shhh <- suppressPackageStartupMessages # It's a library, so shhh!

shhh(library( mgcv ))
shhh(library(dplyr))
shhh(library(ggplot2))
shhh(library(lme4))
shhh(library(tidymv))
shhh(library(gamlss))
shhh(library(gsubfn))
shhh(library(lmerTest))
shhh(library(tidyverse))
shhh(library(boot))
shhh(library(rsample))
shhh(library(plotrix))
shhh(library(ggrepel))
shhh(library(mgcv))

shhh(library(brms))
shhh(library(bayesplot))
shhh(library(patchwork))
shhh(library(MASS))
shhh(library(tidyr))
shhh(library(extraDistr))
shhh(library(purrr))
# For exercises with Stan code
shhh(library(rstan))
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = FALSE)

library(car)
library(coda)
shhh(library(gridExtra))

theme_set(theme_bw())
options(digits=4)
options(scipen=999)
set.seed(444)
pipe_message = function(.data, status) {message(status); .data}

Read in MoTR Data


rate = 160

file_prefix = "../reading_measures/provo/provo_f160/"
fnames = list.files(path=file_prefix)

df = data.frame()
for (f in fnames) {
  temp = read.csv(paste0(file_prefix, "/", f)) %>%
    mutate(subj = str_remove(f, "_reading_measures.csv"))
  df = rbind(df, temp)
}

# Filter out readers who don't answer the comprehension questions correctly
filter_df = df %>%
  group_by(para_nr, subj) %>% summarise(correct = if_else(unique(correctness) == 1, 1, 0)) %>% ungroup() %>%
  drop_na() %>%
  group_by(subj) %>% summarise(p_correct = mean(correct)) %>% ungroup() %>%
  mutate(p_correct = round(p_correct, digits = 2))
`summarise()` has grouped output by 'para_nr'. You can override using the `.groups` argument.
filter_df = filter_df %>% filter(p_correct < 0.8)
filter_list = filter_df$subj
## reader_3:0.70, reader_60:0.79, reader_76:0.72 , reader_256:0.71 , reader_262:0.57 

raw_df = df %>%
  filter(! subj %in% c(filter_list)) %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "reader_")) %>%
  mutate(subj = as.character(subj)) %>%
  # filter(! subj %in% c("3", "60", "76", "256", "262")) %>% # Explanation for this filtering
  mutate(FPReg = if_else(total_duration == 0, -1, FPReg)) %>% #If the word is skipped we can't say that it wasn't regressed on the first pass. Set to a "NA"
  dplyr::select(expr_id, cond_id, para_nr, word, word_nr, first_duration, total_duration, gaze_duration, go_past_time, FPReg, subj)

length(unique(raw_df$subj))
[1] 46
df %>%
  filter(! subj %in% c(filter_list)) %>%
  filter(FPReg >= 0) %>%
  dplyr::select(FPReg) %>%
  drop_na() %>%
  summarise( m = mean(FPReg))

df %>%
  filter(! subj %in% c(filter_list)) %>%
  dplyr::select(FPFix) %>%
  drop_na() %>%
  summarise( m = mean(FPFix))
NA
NA
# Average across subjects
motr_agg_df = raw_df %>%
  gather(metric, value, 6:10) %>%
    filter(value >= 0) %>% #Removes the "NA" values for FPReg
  
    # ==== Remove skipped words
    # mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
    # filter(zero == F) %>%
  
    drop_na() %>%
    group_by(para_nr, word_nr, word, metric) %>% 
      mutate(outlier = if_else(metric != "FPReg" & value > (mean(value) + 3 * sd(value)), T, F)) %>% filter(outlier == F) %>%
      summarise(value = mean(value), nsubj = length(unique(subj))) %>%
  ungroup() %>%
  arrange(para_nr, word_nr) %>%
  rename(text_id = para_nr, word_text_idx = word_nr, motr_value = value)
`summarise()` has grouped output by 'para_nr', 'word_nr', 'word'. You can override using the `.groups` argument.
# View(motr_agg_df)
# write.csv(motr_agg_df, file = "/Users/cui/Desktop/MoTR/pipeline/ancillary_data/motr_agg_df.csv", row.names = FALSE)

Comparison to Provo

# Read in Provo surprisal, frequency and length data
provo_modeling_df = read.csv("../ancillary_data/provo_df.csv") %>%
  dplyr::select(text_id, sent_id, trigger_idx, word, freq, surp, len) %>%
  rename(word_idx = trigger_idx)

provo_modeling_df
# View(provo_modeling_df)
# Read in Provo eyetracking data

provo_raw_df = read.csv("../ancillary_data/provo_eyetracking.csv")

# unique(provo_raw_df$Participant_ID)
# length(unique(provo_raw_df$Participant_ID))

provo_eyetracking_df = provo_raw_df %>%
  dplyr::select(Participant_ID, Text_ID, Sentence_Number, Word_In_Sentence_Number, Word, Word_Number, IA_FIRST_FIX_PROGRESSIVE, IA_FIRST_RUN_DWELL_TIME, IA_DWELL_TIME, IA_REGRESSION_PATH_DURATION, IA_REGRESSION_OUT, IA_SKIP) %>%
  rename( #first_duration = IA_FIRST_FIXATION_DURATION,   
          gaze_duration = IA_FIRST_RUN_DWELL_TIME,
          total_duration = IA_DWELL_TIME,
          go_past_time = IA_REGRESSION_PATH_DURATION,
          subj = Participant_ID,
          text_id = Text_ID,
          sent_id = Sentence_Number,
          word_idx = Word_In_Sentence_Number,
          word_text_idx = Word_Number,   # IA_ID?
          word = Word,      # Word?
          FPReg = IA_REGRESSION_OUT,
          skip = IA_SKIP,
          ff_progressive = IA_FIRST_FIX_PROGRESSIVE) %>%
  mutate(first_duration = gaze_duration) %>%
  mutate(gaze_duration = if_else(ff_progressive == 0, 0, as.double(gaze_duration)),
         go_past_time = if_else(ff_progressive == 0, 0, as.double(go_past_time))) %>%
  dplyr::select(-ff_progressive) %>%
  
  mutate(
    gaze_duration = if_else(total_duration == 0, 0, as.double(gaze_duration)),
      go_past_time = if_else(total_duration == 0, 0, as.double(go_past_time)),
      FPReg = if_else(total_duration == 0, -1, as.double(FPReg)),
      first_duration =  if_else(total_duration == 0, 0, as.double(first_duration)),
  ) %>%
  
  # drop_na() %>%     # will drop the whole row with all the metrics
  gather(metric, value, 7:12) %>%
  filter(value >= 0) %>%          # filter skipped word in eye tracking data for FPReg
  # ==== Remove skipped words
  # mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
  # filter(zero == F) %>%
  
  # mutate(value = if_else(is.na(value), as.integer(0), as.integer(value))) %>%
  # mutate(value = if_else(metric != "FPReg" & is.na(value), as.integer(0), as.integer(value))) %>%
  drop_na() %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "Sub")) %>%
  mutate(subj = as.integer(subj)) %>%
    group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    mutate(outlier = if_else(metric != "FPReg" & metric != "skip" & value > (mean(value) + 3 * sd(value) ), T, F)) %>%
    filter(outlier == F) %>%
  ungroup() #%>%

# Aggregate cross-participant data for all subjects
provo_eyetracking_agg_df = provo_eyetracking_df %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value),
              nsubj = length(unique(subj))) %>%
    ungroup()
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
# View(provo_eyetracking_df)

# View(provo_eyetracking_agg_df)
# write.csv(provo_eyetracking_agg_df, file = "/Users/cui/Desktop/MoTR/pipeline/ancillary_data/provo_eyetracking_agg_df.csv", row.names = FALSE)

provo_raw_df %>%
  dplyr::select(IA_REGRESSION_OUT) %>%
  drop_na() %>%
  summarise( m = mean(IA_REGRESSION_OUT))

provo_raw_df %>%
  dplyr::select(IA_SKIP) %>%
  drop_na() %>%
  summarise( m = mean(IA_SKIP))
NA
NA

# Split the eyetracking data in two by subjects to see how well it correlates with itself
provo_eyetracking_subj1_df_temp = provo_eyetracking_df %>%
  filter(subj <= 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
  rename(value_1 = value) #%>%
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
  # dplyr::select(-sent_id, -word_idx)

# View(provo_eyetracking_subj1_df_temp)

provo_eyetracking_subj1_df = merge(provo_eyetracking_subj1_df_temp, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
  arrange(text_id, sent_id, word_idx) %>%
  filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
  rename(word = word.y) %>%
  dplyr::select(text_id, word_text_idx, metric, word, value_1)

# View(provo_eyetracking_subj1_df)

provo_eyetracking_subj2_df = provo_eyetracking_df %>%
  filter(subj > 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
    rename(value_2 = value)%>%
  dplyr::select(-sent_id, -word_idx)
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
# View(provo_eyetracking_subj2_df)
  
provo_eyetr_grouped_df = merge(provo_eyetracking_subj2_df, provo_eyetracking_subj1_df, by=c("text_id", "word_text_idx", "metric")) %>%
  # filter(word.x == word.y) %>%
  dplyr::select(-word.y) %>%
  group_by(metric) %>%
    mutate(motr_outlier = if_else(metric != "FPReg" & metric != "skip" & value_1 > (mean(value_1) + 3 * sd(value_1) ), T, F)) %>%
    filter(motr_outlier == F) %>%
    mutate(eyetr_outlier = if_else(metric != "FPReg" & metric != "skip" & value_2 > (mean(value_2) + 3 * sd(value_2) ), T, F)) %>%
    filter(eyetr_outlier == F) %>%
  ungroup() %>%
  gather(measure, value, c("value_1", "value_2")) %>%
  dplyr::select(-motr_outlier, -eyetr_outlier)

# View(provo_eyetr_grouped_df)
provo_df = merge(provo_eyetracking_agg_df, provo_modeling_df, by=c("text_id", "sent_id", "word_idx")) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  arrange(text_id, sent_id, word_idx) %>%
  rename(eyetr_value = value) 

provo_df = merge(provo_df, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
  # almost all the word.x != word.y is because of normalization problem, so we can keep them, instead, deleting some special cases
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
# filter(word.x == word) #%>%
dplyr::select(-word.x, -word.y) %>%
group_by(metric) %>%
  mutate(motr_outlier = if_else(metric != "FPReg" & motr_value > (mean(motr_value) + 3 * sd(motr_value) ), T, F)) %>%
  filter(motr_outlier == F) %>%
  mutate(eyetr_outlier = if_else(metric != "FPReg" & eyetr_value > (mean(eyetr_value) + 3 * sd(eyetr_value) ), T, F)) %>%
  filter(eyetr_outlier == F) %>%
ungroup() %>%
gather(measure, value, c("eyetr_value", "motr_value")) %>%
dplyr::select(-motr_outlier, -eyetr_outlier)
  
# View(provo_df)
# provo_df

Bayesian – use Stan – motr & eyetr correlation

print("Gaze Duration")
[1] "Gaze Duration"
gd_df = provo_df %>% filter(metric == "gaze_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gd_df$eyetr_value, gd_df$motr_value)$estimate)
   cor 
0.7874 
print(cor.test(gd_df$eyetr_value_log, gd_df$motr_value_log)$estimate)
   cor 
0.6055 
# View(gd_df)
gd_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

NA
# center data around 0.

gd_temp <- gd_df[c("eyetr_value", "motr_value")] %>%
   # mutate(eyetr_value = eyetr_value - mean(eyetr_value),
   #      motr_value = motr_value - mean(motr_value)) %>%
  data.matrix()

gd_temp_log <- gd_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(gd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gd_temp_log
plot(gd_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")

gd_data = list(x=gd_temp, N=nrow(gd_temp))

fit_gd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gd, file = paste0("./bayesian_models_cor/motr_eyetr_gaze_duration_cor_drop0s.rds"))
print("Go Past Time")
[1] "Go Past Time"
gpt_df = provo_df %>% filter(metric == "go_past_time") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gpt_df$eyetr_value, gpt_df$motr_value)$estimate)
   cor 
0.7292 
print(cor.test(gpt_df$eyetr_value_log, gpt_df$motr_value_log)$estimate)
   cor 
0.5889 
gpt_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

gpt_temp <- gpt_df[c("eyetr_value", "motr_value")] %>% data.matrix()

gpt_temp_log <- gpt_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gpt_temp
plot(gpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gpt_temp_log
plot(gpt_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")

# -------fit model go past time ----------
gpt_data = list(x=gpt_temp, N=nrow(gpt_temp))
fit_gpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gpt, file = paste0("./bayesian_models_cor/motr_eyetr_go_past_time_cor_drop0s.rds"))
print("Total Duration")
[1] "Total Duration"
td_df = provo_df %>% filter(metric == "total_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(td_df$eyetr_value, td_df$motr_value)$estimate)
   cor 
0.7601 
print(cor.test(td_df$eyetr_value_log, td_df$motr_value_log)$estimate)
   cor 
0.6421 
td_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

td_temp <- td_df[c("eyetr_value", "motr_value")] %>% data.matrix()

td_temp_log <- td_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(td_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix td_temp_log
plot(td_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")

# -------fit model total duration ----------
td_data = list(x=td_temp, N=nrow(td_temp))
fit_td = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=td_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_td@stanmodel@dso <- new("cxxdso")
saveRDS(fit_td, file = paste0("./bayesian_models_cor/motr_eyetr_total_duration_cor.rds"))
print("First Pass Regression Prob.")
[1] "First Pass Regression Prob."
reg_df = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0)
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))
print(cor.test(reg_df$eyetr_value, reg_df$motr_value)$estimate)
   cor 
0.2454 
# View(reg_df)
reg_df %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp <- reg_df[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")

# -------fit model FPReg ----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
  file="stan_models/bivariate_beta_correlation_reg.stan", 
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models_cor/motr_eyetr_FPReg_cor_drop0s.rds"))
# models with all 0s
fit_gd = readRDS("./bayesian_models_cor/motr_eyetr_gaze_duration_cor.rds")
fit_gpt = readRDS("./bayesian_models_cor/motr_eyetr_go_past_time_cor.rds")
fit_td = readRDS("./bayesian_models_cor/motr_eyetr_total_duration_cor.rds")
fit_reg = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor.rds")

# models for drop 0s
# fit_gd = readRDS("./motr_eyetr_gaze_duration_cor_drop0s.rds")
# fit_gpt = readRDS("./motr_eyetr_go_past_time_cor_drop0s.rds")
# fit_td = readRDS("./motr_eyetr_total_duration_cor_drop0s.rds")
# fit_reg = readRDS("./motr_eyetr_FPReg_cor_drop0s.rds")

print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
print(fit_gd)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           125.79    0.01   1.06    123.70    125.05    125.78    126.52    127.85  6415    1
mu[2]           201.36    0.03   2.02    197.35    200.00    201.35    202.73    205.33  6243    1
sigma[1]         75.24    0.01   0.85     73.59     74.67     75.23     75.80     76.93  5554    1
sigma[2]        141.51    0.02   1.62    138.34    140.42    141.51    142.60    144.67  5947    1
nu               67.21    0.20  17.95     39.60     54.39     64.56     77.13    109.21  8434    1
rho               0.82    0.00   0.01      0.80      0.81      0.82      0.82      0.83  6766    1
cov[1,1]       5662.24    1.72 128.14   5416.22   5575.11   5659.86   5745.30   5917.61  5551    1
cov[1,2]       8685.45    3.11 212.25   8279.31   8538.73   8681.39   8824.04   9105.51  4657    1
cov[2,1]       8685.45    3.11 212.25   8279.31   8538.73   8681.39   8824.04   9105.51  4657    1
cov[2,2]      20027.42    5.94 457.93  19138.56  19716.41  20025.43  20333.73  20929.67  5948    1
x_rand[1]       138.57    0.76  67.06     20.92     90.39    135.44    181.71    278.91  7876    1
x_rand[2]       228.21    1.42 123.41     23.91    135.54    219.29    310.19    484.45  7563    1
attempt           0.11    0.00   0.34      0.00      0.00      0.00      0.00      1.00  8077    1
max_attempts   1000.00     NaN   0.00   1000.00   1000.00   1000.00   1000.00   1000.00   NaN  NaN
lp__         -55825.25    0.03   1.73 -55829.39 -55826.17 -55824.95 -55823.98 -55822.84  3949    1

Samples were drawn using NUTS(diag_e) at Sat Jul 22 14:54:30 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
print(fit_gpt)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           147.00    0.02   1.50    144.05    145.98    147.01    148.00    149.89  6405    1
mu[2]           205.47    0.03   2.45    200.61    203.83    205.51    207.14    210.20  6441    1
sigma[1]         94.99    0.02   1.29     92.47     94.13     95.01     95.86     97.55  5688    1
sigma[2]        155.50    0.03   2.15    151.26    154.09    155.49    156.95    159.73  5766    1
nu                7.54    0.01   0.57      6.53      7.14      7.50      7.89      8.75  6096    1
rho               0.80    0.00   0.01      0.79      0.80      0.80      0.81      0.82  7275    1
cov[1,1]       9025.61    3.26 245.47   8551.40   8860.01   9025.97   9188.35   9516.81  5687    1
cov[1,2]      11835.28    4.75 338.12  11168.95  11610.06  11836.87  12061.87  12507.53  5075    1
cov[2,1]      11835.28    4.75 338.12  11168.95  11610.06  11836.87  12061.87  12507.53  5075    1
cov[2,2]      24186.07    8.79 667.52  22879.89  23743.24  24176.78  24634.26  25514.54  5770    1
x_rand[1]       174.00    1.04  93.42     23.81    107.45    163.64    228.93    381.05  8038    1
x_rand[2]       250.51    1.65 147.29     25.35    144.88    232.92    337.60    580.10  8015    1
attempt           0.17    0.00   0.45      0.00      0.00      0.00      0.00      1.00  8278    1
max_attempts   1000.00     NaN   0.00   1000.00   1000.00   1000.00   1000.00   1000.00   NaN  NaN
lp__         -57729.27    0.03   1.72 -57733.34 -57730.21 -57728.95 -57728.00 -57726.89  3578    1

Samples were drawn using NUTS(diag_e) at Sat Jul 22 15:12:39 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
print(fit_td)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           173.42    0.02   1.42    170.69    172.45    173.41    174.36    176.23  5366    1
mu[2]           250.34    0.03   2.63    245.22    248.58    250.33    252.11    255.51  5883    1
sigma[1]         95.97    0.02   1.23     93.60     95.16     95.97     96.77     98.44  5507    1
sigma[2]        177.77    0.03   2.34    173.20    176.17    177.73    179.36    182.38  5591    1
nu               21.87    0.06   4.44     15.31     18.76     21.22     24.20     32.11  4880    1
rho               0.80    0.00   0.01      0.79      0.79      0.80      0.80      0.81  7369    1
cov[1,1]       9212.50    3.18 235.62   8760.98   9054.56   9209.71   9364.36   9690.76  5503    1
cov[1,2]      13638.88    5.20 366.78  12944.80  13389.27  13630.18  13883.70  14380.99  4976    1
cov[2,1]      13638.88    5.20 366.78  12944.80  13389.27  13630.18  13883.70  14380.99  4976    1
cov[2,2]      31608.56   11.12 830.86  29999.18  31035.03  31589.54  32169.43  33263.08  5587    1
x_rand[1]       190.27    0.98  88.62     36.10    126.07    185.26    247.04    375.18  8190    1
x_rand[2]       285.15    1.82 158.43     27.54    166.89    270.34    384.29    636.46  7595    1
attempt           0.11    0.00   0.35      0.00      0.00      0.00      0.00      1.00  7801    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -58397.55    0.03   1.77 -58401.90 -58398.49 -58397.22 -58396.27 -58395.11  3901    1

Samples were drawn using NUTS(diag_e) at Sat Jul 22 16:14:49 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.--------------------------------------------"
print(fit_reg)
Inference for Stan model: bivariate_correlation_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

               mean se_mean      sd     2.5%      25%      50%      75%    97.5% n_eff Rhat
alpha[1]       0.85    0.00    0.02     0.81     0.84     0.85     0.86     0.89  5443    1
alpha[2]       0.15    0.00    0.00     0.14     0.14     0.15     0.15     0.15  7702    1
beta[1]        4.64    0.00    0.14     4.37     4.54     4.64     4.73     4.92  5975    1
beta[2]        2.52    0.00    0.12     2.29     2.44     2.52     2.60     2.76  7877    1
L[1,1]         1.00     NaN    0.00     1.00     1.00     1.00     1.00     1.00   NaN  NaN
L[1,2]         0.00     NaN    0.00     0.00     0.00     0.00     0.00     0.00   NaN  NaN
L[2,1]         0.45    0.00    0.38    -0.41     0.18     0.52     0.77     0.95  8489    1
L[2,2]         0.78    0.00    0.21     0.31     0.64     0.85     0.96     1.00  6201    1
mu[1]          2.34    0.00    0.05     2.25     2.31     2.34     2.37     2.44  5468    1
mu[2]          1.16    0.00    0.00     1.15     1.16     1.16     1.16     1.16  7701    1
sigma[1]     104.39    0.19   14.73    78.70    93.94   103.18   113.48   136.81  5984    1
sigma[2]      12.55    0.02    1.50     9.91    11.50    12.44    13.51    15.76  7872    1
rho[1,1]       1.00     NaN    0.00     1.00     1.00     1.00     1.00     1.00   NaN  NaN
rho[1,2]       0.45    0.00    0.38    -0.41     0.18     0.52     0.77     0.95  8489    1
rho[2,1]       0.45    0.00    0.38    -0.41     0.18     0.52     0.77     0.95  8489    1
rho[2,2]       1.00    0.00    0.00     1.00     1.00     1.00     1.00     1.00   592    1
Sigma[1,1] 11114.32   41.44 3195.37  6193.09  8824.35 10646.59 12878.40 18717.61  5946    1
Sigma[1,2]   583.46    5.69  518.49  -527.26   234.91   654.32   971.41  1433.33  8291    1
Sigma[2,1]   583.46    5.69  518.49  -527.26   234.91   654.32   971.41  1433.33  8291    1
Sigma[2,2]   159.88    0.44   38.81    98.15   132.31   154.69   182.47   248.29  7789    1
x_rand[1]      0.16    0.00    0.14     0.00     0.05     0.12     0.23     0.53  7955    1
x_rand[2]      0.06    0.00    0.12     0.00     0.00     0.00     0.05     0.45  7762    1
lp__       14798.20    0.03    1.56 14794.21 14797.42 14798.55 14799.33 14800.22  3508    1

Samples were drawn using NUTS(diag_e) at Sat Jul 22 20:39:27 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
# stan_trace(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_gd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_gd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_gd)
stan_dens(fit_gd, separate_chains = TRUE)
stan_plot(fit_gd)

# Go Past Time
stan_trace(fit_gpt)
stan_dens(fit_gpt, separate_chains = TRUE)
stan_plot(fit_gpt)

# Total Duration
stan_trace(fit_td)
stan_dens(fit_td, separate_chains = TRUE)
stan_plot(fit_td)

# FPReg
stan_trace(fit_reg)
stan_dens(fit_reg, separate_chains = TRUE)
stan_plot(fit_reg)
p1 <- stan_trace(fit_gd, pars = 'rho', inc_warmup = FALSE)
p2 <- stan_dens(fit_gd, pars = 'rho', separate_chains = TRUE)
p3 <- stan_trace(fit_gd, pars = 'mu[1]', inc_warmup = FALSE)
p4 <- stan_dens(fit_gd, pars = 'mu[1]', separate_chains = TRUE)
p5 <- stan_trace(fit_gd, pars = 'mu[2]', inc_warmup = FALSE)
p6 <- stan_dens(fit_gd, pars = 'mu[2]', separate_chains = TRUE)
p7 <- stan_trace(fit_gd, pars = 'sigma[1]', inc_warmup = FALSE)
p8 <- stan_dens(fit_gd, pars = 'sigma[1]', separate_chains = TRUE)
p9 <- stan_trace(fit_gd, pars = 'sigma[2]', inc_warmup = FALSE)
p10 <- stan_dens(fit_gd, pars = 'sigma[2]', separate_chains = TRUE)
p11 <- stan_trace(fit_gd, pars = 'nu', inc_warmup = FALSE)
p12 <- stan_dens(fit_gd, pars = 'nu', separate_chains = TRUE)


# Use grid.arrange() to arrange the plots
# grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, ncol=2, nrow=6)
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
rho_gd = as.numeric(extract(fit_gd, "rho")[[1]])
mean = mean(rho_gd)
crI = quantile(rho_gd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.8156
HPD: [0.8029, 0.8278]
crI: [0.8029, 0.8278]
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
rho_gpt = as.numeric(extract(fit_gpt, "rho")[[1]])
mean = mean(rho_gpt)
crI = quantile(rho_gpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.8011
HPD: [0.7863, 0.8161]
crI: [0.7857, 0.8157]
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
rho_td = as.numeric(extract(fit_td, "rho")[[1]])
mean = mean(rho_td)
crI = quantile(rho_td, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_td), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.7993
HPD: [0.7847, 0.8127]
crI: [0.785, 0.8131]
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.4456
HPD: [-0.2832, 0.9833]
crI: [-0.4076, 0.9497]
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
gd_rand <- extract(fit_gd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gd_rand[,1], gd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
gpt_rand <- extract(fit_gpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gpt_rand[,1], gpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
td_rand <- extract(fit_td, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(td_rand[,1], td_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(td_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(td_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(td_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
reg_rand <- extract(fit_reg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(reg_rand[,1], reg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(reg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(reg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

NA
NA

model motr eyetr FPReg correlation (eyetr < 0.3)

print("First Pass Regression Prob. all and  < 0.3")
[1] "First Pass Regression Prob. all and  < 0.3"
reg_df_all = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))

reg_df_low_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)

reg_df_low = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate)
   cor 
0.2454 
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value)
[1] 0.000000000000000000000000000000000004731
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$estimate)
    cor 
0.09488 
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$p.value)
[1] 0.000006321
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$estimate)
    cor 
0.05585 
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$p.value)
[1] 0.1193
# View(reg_df)
reg_df_low %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_all <- reg_df_all[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low <- reg_df_low[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low_drop0 <- reg_df_low_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(reg_temp_low, pch = 16, col = "blue",
     main = "Not Log-Transformed")

# -------fit model FPReg < 0.3 ----------
reg_data = list(x=reg_temp_all, N=nrow(reg_temp_all))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models_cor/motr_eyetr_FPReg_cor_all_data_drop0s.rds"))

model motr eyetr FPReg correlation (eyetr >= 0.3)

print("First Pass Regression Prob. >= 0.3")
[1] "First Pass Regression Prob. >= 0.3"
reg_df_high_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)

reg_df_high = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$estimate)
   cor 
0.3952 
print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$p.value)
[1] 0.000000000009391
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$estimate)
  cor 
0.489 
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$p.value)
[1] 0.000000001007
# View(reg_df)
reg_df_high %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_high <- reg_df_high[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_high_drop0 <- reg_df_high_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))

# Plot the first data matrix td_temp
plot(reg_temp_all, pch = 16, col = "blue",
     main = "FPReg not logged all data")
plot(reg_temp_low, pch = 16, col = "blue",
     main = "FPReg not logged eyetr < 0.3 ")
plot(reg_temp_high, pch = 16, col = "blue",
     main = "FPReg not logged eyetr >= 0.3")

# -------fit model FPReg >= 0.3 ----------
reg_data = list(x=reg_temp_high_drop0, N=nrow(reg_temp_high_drop0))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models_cor/motr_eyetr_FPReg_cor_03-1_drop0s.rds"))
fit_mreg_all = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_all_data.rds")
fit_mreg_all_drop0 = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_all_data_drop0s.rds")
fit_mreg_low = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_00-03.rds")
fit_mreg_low_drop0 = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_00-03_drop0s.rds")
fit_mreg_high = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_03-1.rds")
fit_mreg_high_drop0 = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_03-1_drop0s.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data --------------------------------------------"
print(fit_mreg_all)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
mu[1]           0.12    0.00 0.00    0.12    0.12    0.12    0.13    0.13  5811    1
mu[2]           0.03    0.00 0.00    0.02    0.03    0.03    0.03    0.03  3384    1
sigma[1]        0.08    0.00 0.00    0.07    0.07    0.08    0.08    0.08  3694    1
sigma[2]        0.05    0.00 0.00    0.05    0.05    0.05    0.05    0.06  3040    1
nu              2.42    0.00 0.14    2.16    2.33    2.42    2.52    2.71  3250    1
rho             0.16    0.00 0.02    0.11    0.14    0.16    0.17    0.20  8176    1
cov[1,1]        0.01    0.00 0.00    0.01    0.01    0.01    0.01    0.01  3693    1
cov[1,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  6348    1
cov[2,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  6348    1
cov[2,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  3053    1
x_rand[1]       0.16    0.00 0.12    0.02    0.09    0.14    0.20    0.40  7811    1
x_rand[2]       0.07    0.00 0.08    0.00    0.03    0.05    0.09    0.25  7966    1
attempt         0.63    0.01 1.03    0.00    0.00    0.00    1.00    3.00  7919    1
max_attempts   10.00     NaN 0.00   10.00   10.00   10.00   10.00   10.00   NaN  NaN
lp__         7450.58    0.03 1.78 7446.22 7449.65 7450.92 7451.89 7452.98  3519    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 23:08:09 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------"
print(fit_mreg_all_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
mu[1]           0.15    0.00 0.00    0.14    0.15    0.15    0.15    0.16  8507    1
mu[2]           0.14    0.00 0.00    0.13    0.14    0.14    0.14    0.14  7923    1
sigma[1]        0.09    0.00 0.00    0.08    0.08    0.09    0.09    0.09  7186    1
sigma[2]        0.06    0.00 0.00    0.06    0.06    0.06    0.06    0.06  6227    1
nu              2.78    0.00 0.23    2.37    2.62    2.77    2.93    3.26  7008    1
rho             0.18    0.00 0.04    0.11    0.16    0.18    0.21    0.26  9833    1
cov[1,1]        0.01    0.00 0.00    0.01    0.01    0.01    0.01    0.01  7173    1
cov[1,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8599    1
cov[2,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8599    1
cov[2,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  6234    1
x_rand[1]       0.18    0.00 0.13    0.02    0.10    0.16    0.22    0.43  7699    1
x_rand[2]       0.15    0.00 0.08    0.03    0.10    0.14    0.18    0.34  8175    1
attempt         0.16    0.00 0.43    0.00    0.00    0.00    0.00    1.00  7917    1
max_attempts   10.00     NaN 0.00   10.00   10.00   10.00   10.00   10.00   NaN  NaN
lp__         2566.44    0.03 1.75 2562.18 2565.53 2566.76 2567.72 2568.88  3487    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 23:18:11 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------"
print(fit_mreg_low)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
mu[1]           0.12    0.00 0.00    0.11    0.12    0.12    0.12    0.12  8566    1
mu[2]           0.03    0.00 0.00    0.03    0.03    0.03    0.04    0.04  4981    1
sigma[1]        0.06    0.00 0.00    0.06    0.06    0.06    0.06    0.07  7280    1
sigma[2]        0.06    0.00 0.00    0.06    0.06    0.06    0.06    0.07  4552    1
nu              5.54    0.01 0.51    4.62    5.19    5.51    5.86    6.62  4637    1
rho             0.10    0.00 0.02    0.06    0.09    0.10    0.12    0.15  8596    1
cov[1,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  7299    1
cov[1,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8259    1
cov[2,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8259    1
cov[2,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  4571    1
x_rand[1]       0.13    0.00 0.07    0.02    0.08    0.12    0.17    0.28  8277    1
x_rand[2]       0.07    0.00 0.06    0.00    0.03    0.06    0.10    0.21  7280    1
attempt         0.53    0.01 0.90    0.00    0.00    0.00    1.00    3.00  8006    1
max_attempts   10.00     NaN 0.00   10.00   10.00   10.00   10.00   10.00   NaN  NaN
lp__         7794.82    0.03 1.78 7790.50 7793.88 7795.16 7796.13 7797.26  3232    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 20:52:03 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------"
print(fit_mreg_low_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
mu[1]           0.13    0.00 0.00    0.13    0.13    0.13    0.13    0.14 10534    1
mu[2]           0.13    0.00 0.00    0.13    0.13    0.13    0.14    0.14  8839    1
sigma[1]        0.06    0.00 0.00    0.06    0.06    0.06    0.06    0.07  8534    1
sigma[2]        0.06    0.00 0.00    0.06    0.06    0.06    0.07    0.07  7122    1
nu              6.50    0.01 0.96    4.90    5.83    6.40    7.06    8.73  6790    1
rho             0.07    0.00 0.04    0.00    0.05    0.07    0.10    0.15 10356    1
cov[1,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8526    1
cov[1,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00 10435    1
cov[2,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00 10435    1
cov[2,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  7123    1
x_rand[1]       0.14    0.00 0.07    0.02    0.09    0.13    0.18    0.28  8031    1
x_rand[2]       0.14    0.00 0.07    0.02    0.10    0.14    0.18    0.29  8006    1
attempt         0.08    0.00 0.30    0.00    0.00    0.00    0.00    1.00  7642    1
max_attempts   10.00     NaN 0.00   10.00   10.00   10.00   10.00   10.00   NaN  NaN
lp__         2737.21    0.03 1.76 2732.81 2736.27 2737.56 2738.50 2739.62  3992    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 20:30:02 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------"
print(fit_mreg_high)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

               mean se_mean    sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
mu[1]          0.45    0.00  0.01   0.44   0.45   0.45   0.46   0.47  5045    1
mu[2]          0.15    0.00  0.01   0.13   0.15   0.15   0.16   0.18  4628    1
sigma[1]       0.11    0.00  0.01   0.10   0.11   0.11   0.12   0.12  5622    1
sigma[2]       0.15    0.00  0.01   0.13   0.15   0.15   0.16   0.17  4499    1
nu            25.71    0.18 12.83   9.59  16.53  22.89  31.70  58.03  5109    1
rho            0.41    0.00  0.06   0.30   0.37   0.41   0.45   0.51  7137    1
cov[1,1]       0.01    0.00  0.00   0.01   0.01   0.01   0.01   0.02  5625    1
cov[1,2]       0.01    0.00  0.00   0.00   0.01   0.01   0.01   0.01  4842    1
cov[2,1]       0.01    0.00  0.00   0.00   0.01   0.01   0.01   0.01  4842    1
cov[2,2]       0.02    0.00  0.00   0.02   0.02   0.02   0.03   0.03  4553    1
x_rand[1]      0.47    0.00  0.11   0.25   0.39   0.47   0.54   0.70  7964    1
x_rand[2]      0.20    0.00  0.13   0.01   0.10   0.19   0.28   0.49  7829    1
attempt        0.19    0.01  0.47   0.00   0.00   0.00   0.00   1.00  7881    1
max_attempts  10.00     NaN  0.00  10.00  10.00  10.00  10.00  10.00   NaN  NaN
lp__         579.26    0.03  1.75 575.10 578.35 579.56 580.56 581.68  3529    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 22:25:38 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------"
print(fit_mreg_high_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

               mean se_mean    sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
mu[1]          0.48    0.00  0.01   0.46   0.48   0.48   0.49   0.51  6409    1
mu[2]          0.27    0.00  0.01   0.25   0.26   0.27   0.28   0.30  6397    1
sigma[1]       0.12    0.00  0.01   0.11   0.12   0.12   0.13   0.14  6555    1
sigma[2]       0.16    0.00  0.01   0.14   0.15   0.16   0.17   0.18  6281    1
nu            32.85    0.16 15.27  11.84  21.65  30.12  40.80  69.54  8953    1
rho            0.51    0.00  0.07   0.37   0.47   0.52   0.56   0.64  6986    1
cov[1,1]       0.02    0.00  0.00   0.01   0.01   0.02   0.02   0.02  6491    1
cov[1,2]       0.01    0.00  0.00   0.01   0.01   0.01   0.01   0.02  5145    1
cov[2,1]       0.01    0.00  0.00   0.01   0.01   0.01   0.01   0.02  5145    1
cov[2,2]       0.03    0.00  0.00   0.02   0.02   0.03   0.03   0.03  6242    1
x_rand[1]      0.49    0.00  0.13   0.25   0.41   0.49   0.57   0.74  8129    1
x_rand[2]      0.29    0.00  0.15   0.04   0.18   0.28   0.38   0.60  7467    1
attempt        0.06    0.00  0.24   0.00   0.00   0.00   0.00   1.00  7913    1
max_attempts  10.00     NaN  0.00  10.00  10.00  10.00  10.00  10.00   NaN  NaN
lp__         300.28    0.03  1.75 296.05 299.35 300.60 301.58 302.70  3555    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 22:31:08 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
# # FPReg all data
# stan_trace(fit_mreg_all)
# stan_dens(fit_mreg_all, separate_chains = TRUE)
# stan_plot(fit_mreg_all)

# stan_trace(fit_mreg_all_drop0)
# stan_dens(fit_mreg_all_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_all_drop0)

# # FPReg < 0.3
# stan_trace(fit_mreg_low)
# stan_dens(fit_mreg_low, separate_chains = TRUE)
# stan_plot(fit_mreg_low)
# 
# stan_trace(fit_mreg_low_drop0)
# stan_dens(fit_mreg_low_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_low_drop0)

# FPReg > 0.3
stan_trace(fit_mreg_high)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_mreg_high, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_mreg_high)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

stan_trace(fit_mreg_high_drop0)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_mreg_high_drop0, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_mreg_high_drop0)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

print('---------------------------- First Pass Regression all data--------------------------------------------')
[1] "---------------------------- First Pass Regression all data--------------------------------------------"
rho_mreg_all = as.numeric(extract(fit_mreg_all, "rho")[[1]])
mean = mean(rho_mreg_all)
crI = quantile(rho_mreg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1574
HPD: [0.1139, 0.2017]
crI: [0.1134, 0.2014]
print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression all data no 0s--------------------------------------------"
rho_mreg_all_drop0 = as.numeric(extract(fit_mreg_all_drop0, "rho")[[1]])
mean = mean(rho_mreg_all_drop0)
crI = quantile(rho_mreg_all_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1843
HPD: [0.1106, 0.2573]
crI: [0.1114, 0.2581]
print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3--------------------------------------------"
rho_mreg_low = as.numeric(extract(fit_mreg_low, "rho")[[1]])
mean = mean(rho_mreg_low)
crI = quantile(rho_mreg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.103
HPD: [0.05775, 0.1468]
crI: [0.05805, 0.1475]
print('---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------"
rho_mreg_low_drop0 = as.numeric(extract(fit_mreg_low_drop0, "rho")[[1]])
mean = mean(rho_mreg_low_drop0)
crI = quantile(rho_mreg_low_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.07362
HPD: [-0.000616, 0.1514]
crI: [-0.003759, 0.1491]
print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3--------------------------------------------"
rho_mreg_high = as.numeric(extract(fit_mreg_high, "rho")[[1]])
mean = mean(rho_mreg_high)
crI = quantile(rho_mreg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.4102
HPD: [0.3001, 0.5155]
crI: [0.2992, 0.5146]
print('---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------"
rho_mreg_high_drop0 = as.numeric(extract(fit_mreg_high_drop0, "rho")[[1]])
mean = mean(rho_mreg_high_drop0)
crI = quantile(rho_mreg_high_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.514
HPD: [0.3797, 0.6456]
crI: [0.3728, 0.6412]
print('---------------------------- First Pass Regression all data --------------------------------------------')
[1] "---------------------------- First Pass Regression all data --------------------------------------------"
mallreg_rand <- extract(fit_mreg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand[,1], mallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression all data no 0s--------------------------------------------"
mallreg_rand_drop0 <- extract(fit_mreg_all_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand_drop0[,1], mallreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 --------------------------------------------"
mlowreg_rand <- extract(fit_mreg_low, "x_rand")[[1]]
print(mlowreg_rand)
          
iterations       [,1]        [,2]
      [1,] 0.11511878 0.058692235
      [2,] 0.10080536 0.082278938
      [3,] 0.07419675 0.057422764
      [4,] 0.06302629 0.127161983
      [5,] 0.06788690 0.035711335
      [6,] 0.17361601 0.099604810
      [7,] 0.17739914 0.032665984
      [8,] 0.15288701 0.078389004
      [9,] 0.08785344 0.063478802
     [10,] 0.09083191 0.081437242
     [11,] 0.12284974 0.047957400
     [12,] 0.31929175 0.073022259
     [13,] 0.07321661 0.146609734
     [14,] 0.13305706 0.003287951
     [15,] 0.08590356 0.006066375
     [16,] 0.16248353 0.071463963
     [17,] 0.13724213 0.025405873
     [18,] 0.08694616 0.071649831
     [19,] 0.08053604 0.007858853
     [20,] 0.09780239 0.123744207
     [21,] 0.13732115 0.106339909
     [22,] 0.10146120 0.063139821
     [23,] 0.16804116 0.209745289
     [24,] 0.06875586 0.051329591
     [25,] 0.20564078 0.157609114
     [26,] 0.11818538 0.079607193
     [27,] 0.02165742 0.014432845
     [28,] 0.13120962 0.067054699
     [29,] 0.18783075 0.013201654
     [30,] 0.13485451 0.215877915
     [31,] 0.12536525 0.016880486
     [32,] 0.15425302 0.052811326
     [33,] 0.14898158 0.022886986
     [34,] 0.06153871 0.062990745
     [35,] 0.23170898 0.061582269
     [36,] 0.04856093 0.040421615
     [37,] 0.10355822 0.100074267
     [38,] 0.10764644 0.094893809
     [39,] 0.08864219 0.115942760
     [40,] 0.20666951 0.043051720
     [41,] 0.10821015 0.016148505
     [42,] 0.15294279 0.011321145
     [43,] 0.14933481 0.056753197
     [44,] 0.17713976 0.084162664
     [45,] 0.09788291 0.114313295
     [46,] 0.13066988 0.106625963
     [47,] 0.10615505 0.088889287
     [48,] 0.20691074 0.026068863
     [49,] 0.24950201 0.067895431
     [50,] 0.10795240 0.117285125
     [51,] 0.08922143 0.057697918
     [52,] 0.03089934 0.017194520
     [53,] 0.16693401 0.124276295
     [54,] 0.11681754 0.072287016
     [55,] 0.14234171 0.123206457
     [56,] 0.14999156 0.133564852
     [57,] 0.10005535 0.103454274
     [58,] 0.16385613 0.044270076
     [59,] 0.16835850 0.032276014
     [60,] 0.11316180 0.027836869
     [61,] 0.07068308 0.002429855
     [62,] 0.14861049 0.030644461
     [63,] 0.08026005 0.096752639
     [64,] 0.19610685 0.040573361
     [65,] 0.09889908 0.062582069
     [66,] 0.12216486 0.007868282
     [67,] 0.14305487 0.040541626
     [68,] 0.15325520 0.059214720
     [69,] 0.09187895 0.055394355
     [70,] 0.17195076 0.042190213
     [71,] 0.12668392 0.186737587
     [72,] 0.18615656 0.165676773
     [73,] 0.22236984 0.088002637
     [74,] 0.16154088 0.046439483
     [75,] 0.15833268 0.043527396
     [76,] 0.01828241 0.004917111
     [77,] 0.08366437 0.048070242
     [78,] 0.16296263 0.098637540
     [79,] 0.01911861 0.011826517
     [80,] 0.13246976 0.017932399
     [81,] 0.13172960 0.099700889
     [82,] 0.13120236 0.030903799
     [83,] 0.03414019 0.029845952
     [84,] 0.11794300 0.058150651
     [85,] 0.16321117 0.016221586
     [86,] 0.13485772 0.044536394
     [87,] 0.08323735 0.037789570
     [88,] 0.12784327 0.024592440
     [89,] 0.11474642 0.125264533
     [90,] 0.30003119 0.141163053
     [91,] 0.14886879 0.032300532
     [92,] 0.15315229 0.048796922
     [93,] 0.17373278 0.149832918
     [94,] 0.18119675 0.010159370
     [95,] 0.06593250 0.015273059
     [96,] 0.11489697 0.090629754
     [97,] 0.05417136 0.069271332
     [98,] 0.15524251 0.004718292
     [99,] 0.10611465 0.131221931
    [100,] 0.21191205 0.071115727
    [101,] 0.17860898 0.036576641
    [102,] 0.11838871 0.067754194
    [103,] 0.04618650 0.076011818
    [104,] 0.07149113 0.099317161
    [105,] 0.09685515 0.076951772
    [106,] 0.16948440 0.051397007
    [107,] 0.16397962 0.066380023
    [108,] 0.35141152 0.103202329
    [109,] 0.08981342 0.022256500
    [110,] 0.08611436 0.103216833
    [111,] 0.16803768 0.041021778
    [112,] 0.04956193 0.015672994
    [113,] 0.13670936 0.000734990
    [114,] 0.23189116 0.055383494
    [115,] 0.13711460 0.047420480
    [116,] 0.16211496 0.101883011
    [117,] 0.11059541 0.137053489
    [118,] 0.11198078 0.172268686
    [119,] 0.10133483 0.165735370
    [120,] 0.05108662 0.078902765
    [121,] 0.12428932 0.171929033
    [122,] 0.04522586 0.052318382
    [123,] 0.05402887 0.002728235
    [124,] 0.15048369 0.079199876
    [125,] 0.05697737 0.104910312
    [126,] 0.07572847 0.147633307
    [127,] 0.12887648 0.049633791
    [128,] 0.22314790 0.041083170
    [129,] 0.22296266 0.059077630
    [130,] 0.21951739 0.255508373
    [131,] 0.00223404 0.055496220
    [132,] 0.07173661 0.054690423
    [133,] 0.07481140 0.137328751
    [134,] 0.07525143 0.085271947
    [135,] 0.04136423 0.072566861
    [136,] 0.03438194 0.021353704
    [137,] 0.14141529 0.067505872
    [138,] 0.09668028 0.074937279
    [139,] 0.02472201 0.016822602
    [140,] 0.11822313 0.026794958
    [141,] 0.14415761 0.006674984
    [142,] 0.18880044 0.046960151
    [143,] 0.05028092 0.025961404
    [144,] 0.12708325 0.088561542
    [145,] 0.08283653 0.072829545
    [146,] 0.15347182 0.023318806
    [147,] 0.01142269 0.063812349
    [148,] 0.12291495 0.067960042
    [149,] 0.04984024 0.160028495
    [150,] 0.25604624 0.085652930
    [151,] 0.04078737 0.015191069
    [152,] 0.08595678 0.052905240
    [153,] 0.13995240 0.219389923
    [154,] 0.22090959 0.042733123
    [155,] 0.18415520 0.104559642
    [156,] 0.08260195 0.012290388
    [157,] 0.10528827 0.101146688
    [158,] 0.04248761 0.008418649
    [159,] 0.18137384 0.113199135
    [160,] 0.14031124 0.038414449
    [161,] 0.03366005 0.176112952
    [162,] 0.14277349 0.017972726
    [163,] 0.28849005 0.088943450
    [164,] 0.21049008 0.150368124
    [165,] 0.20449202 0.128347681
    [166,] 0.20154826 0.133227096
    [167,] 0.20617209 0.045203296
    [168,] 0.14030807 0.030559725
    [169,] 0.06781599 0.026680021
    [170,] 0.14175787 0.122642062
    [171,] 0.11642242 0.028491074
    [172,] 0.09444086 0.067812607
    [173,] 0.08146372 0.071740466
    [174,] 0.21592301 0.049242369
    [175,] 0.05339027 0.085080208
    [176,] 0.23715771 0.162442441
    [177,] 0.14907386 0.080646534
    [178,] 0.16665625 0.010641142
    [179,] 0.08762281 0.109566126
    [180,] 0.16748200 0.203084819
    [181,] 0.19729103 0.110051803
    [182,] 0.10864675 0.183272814
    [183,] 0.02316826 0.103830377
    [184,] 0.19500029 0.183994039
    [185,] 0.12985698 0.125661713
    [186,] 0.03678014 0.081088703
    [187,] 0.01636462 0.040741782
    [188,] 0.14966853 0.037247807
    [189,] 0.16901653 0.049923160
    [190,] 0.04106136 0.096784434
    [191,] 0.12871336 0.059869407
    [192,] 0.06865706 0.237498174
    [193,] 0.26966456 0.085940615
    [194,] 0.12410595 0.090481252
    [195,] 0.16076501 0.021451339
    [196,] 0.03501127 0.037148330
    [197,] 0.11881431 0.024395029
    [198,] 0.13844703 0.114910219
    [199,] 0.13827671 0.054376296
    [200,] 0.08632633 0.049064072
    [201,] 0.15284898 0.132365907
    [202,] 0.01798020 0.094991151
    [203,] 0.16232411 0.165244367
    [204,] 0.04837684 0.099663108
    [205,] 0.15518273 0.009033563
    [206,] 0.20949843 0.077769520
    [207,] 0.03107413 0.052367019
    [208,] 0.02035351 0.067190724
    [209,] 0.09963606 0.100378695
    [210,] 0.19275661 0.065212405
    [211,] 0.24225778 0.205765753
    [212,] 0.20004273 0.057244532
    [213,] 0.13847434 0.045051814
    [214,] 0.15773621 0.090245110
    [215,] 0.15047953 0.147590617
    [216,] 0.16487107 0.059836509
    [217,] 0.12665757 0.050683000
    [218,] 0.09094949 0.002510419
    [219,] 0.10542031 0.005130113
    [220,] 0.22753096 0.045561721
    [221,] 0.03580284 0.128345920
    [222,] 0.11835213 0.016904916
    [223,] 0.17028327 0.128150422
    [224,] 0.05732713 0.031236479
    [225,] 0.23400458 0.094380354
    [226,] 0.06259037 0.071035435
    [227,] 0.20540243 0.186561132
    [228,] 0.19764660 0.086970126
    [229,] 0.33535238 0.192990530
    [230,] 0.08367035 0.035087710
    [231,] 0.13871052 0.020411380
    [232,] 0.19342992 0.158416161
    [233,] 0.08127093 0.033472741
    [234,] 0.09843269 0.019492082
    [235,] 0.11286078 0.027784772
    [236,] 0.16429454 0.023088234
    [237,] 0.09617821 0.039965507
    [238,] 0.06805881 0.117448824
    [239,] 0.08037463 0.080497212
    [240,] 0.17147992 0.043923602
    [241,] 0.12419949 0.087096225
    [242,] 0.25589770 0.084563468
    [243,] 0.04566666 0.048433380
    [244,] 0.06386474 0.134554686
    [245,] 0.06457913 0.063314577
    [246,] 0.15993681 0.110859456
    [247,] 0.20280060 0.098053113
    [248,] 0.22316079 0.109198624
    [249,] 0.17570534 0.021499280
    [250,] 0.23282081 0.042906875
    [251,] 0.18372158 0.029512133
    [252,] 0.08197035 0.004310859
    [253,] 0.18710986 0.002739692
    [254,] 0.09702514 0.009427826
    [255,] 0.15432835 0.140245214
    [256,] 0.01098199 0.054770323
    [257,] 0.13039017 0.080334359
    [258,] 0.12548763 0.034325520
    [259,] 0.10109224 0.146139569
    [260,] 0.04649526 0.024862637
    [261,] 0.16522878 0.065266325
    [262,] 0.12637550 0.093174491
    [263,] 0.13457666 0.073536356
    [264,] 0.05523761 0.026375059
    [265,] 0.14113050 0.051976206
    [266,] 0.12766147 0.168166906
    [267,] 0.27497426 0.266434373
    [268,] 0.04864588 0.119077483
    [269,] 0.12446117 0.049617293
    [270,] 0.12390285 0.122423243
    [271,] 0.23296061 0.005926100
    [272,] 0.11308057 0.045735665
    [273,] 0.13606342 0.057557159
    [274,] 0.10269649 0.214493468
    [275,] 0.03080049 0.140656456
    [276,] 0.23752947 0.009794730
    [277,] 0.14345693 0.050528245
    [278,] 0.23357372 0.095965176
    [279,] 0.22693403 0.059573940
    [280,] 0.23064336 0.075155320
    [281,] 0.15326812 0.147275806
    [282,] 0.15736608 0.090018798
    [283,] 0.13990319 0.098139716
    [284,] 0.11676888 0.010110723
    [285,] 0.07745759 0.199327402
    [286,] 0.11578653 0.035865868
    [287,] 0.03181520 0.091280904
    [288,] 0.09658778 0.003368199
    [289,] 0.04742548 0.135282489
    [290,] 0.15516087 0.013135380
    [291,] 0.09233453 0.025960183
    [292,] 0.10461279 0.120716314
    [293,] 0.04651052 0.247655421
    [294,] 0.24185235 0.192352044
    [295,] 0.13902811 0.000447089
    [296,] 0.09860272 0.029912667
    [297,] 0.15658897 0.032597891
    [298,] 0.04125473 0.022361316
    [299,] 0.15582840 0.067618704
    [300,] 0.03818380 0.070778678
    [301,] 0.00417081 0.059067462
    [302,] 0.13935628 0.056633007
    [303,] 0.03264681 0.030042060
    [304,] 0.21883508 0.112290073
    [305,] 0.15970375 0.050920015
    [306,] 0.20118135 0.076743964
    [307,] 0.37954352 0.019106960
    [308,] 0.08132078 0.223166097
    [309,] 0.10764676 0.125319600
    [310,] 0.13416729 0.039931928
    [311,] 0.14020107 0.044156763
    [312,] 0.30262001 0.051785358
    [313,] 0.11034227 0.061554601
    [314,] 0.14927467 0.022172091
    [315,] 0.00683335 0.063488932
    [316,] 0.15723938 0.027677357
    [317,] 0.18217221 0.029984942
    [318,] 0.04585920 0.043608794
    [319,] 0.20479619 0.015475042
    [320,] 0.01916224 0.101021718
    [321,] 0.17108845 0.090072004
    [322,] 0.10195985 0.094742209
    [323,] 0.11034521 0.084848090
    [324,] 0.19674468 0.087521706
    [325,] 0.15261819 0.041568978
    [326,] 0.08955056 0.042288163
    [327,] 0.16087288 0.107762413
    [328,] 0.12897906 0.151649283
    [329,] 0.23504992 0.036320260
    [330,] 0.06343663 0.071456572
    [331,] 0.19377065 0.005604410
    [332,] 0.12563974 0.033147233
    [333,] 0.19456094 0.063654611
    [334,] 0.25979232 0.018981873
    [335,] 0.08097542 0.078387718
    [336,] 0.18885130 0.028039220
    [337,] 0.06286814 0.113354230
    [338,] 0.18656612 0.015405107
    [339,] 0.13154702 0.089911215
    [340,] 0.21055667 0.086631879
    [341,] 0.23053541 0.042533662
    [342,] 0.09544894 0.068344347
    [343,] 0.00514321 0.176650348
    [344,] 0.07806794 0.115978405
    [345,] 0.18547438 0.023611917
    [346,] 0.09518530 0.018674572
    [347,] 0.00791384 0.025673876
    [348,] 0.03254651 0.095440018
    [349,] 0.23914111 0.104960076
    [350,] 0.02174378 0.127351477
    [351,] 0.13040958 0.082524688
    [352,] 0.17162183 0.037012334
    [353,] 0.14759813 0.104660405
    [354,] 0.09187011 0.136057693
    [355,] 0.25081071 0.218201535
    [356,] 0.07999060 0.072421693
    [357,] 0.08894697 0.076428618
    [358,] 0.15239893 0.208482571
    [359,] 0.01126074 0.047400588
    [360,] 0.11422568 0.022059870
    [361,] 0.04456333 0.096210477
    [362,] 0.18807662 0.130795563
    [363,] 0.20247112 0.089455601
    [364,] 0.14544622 0.178645259
    [365,] 0.17631443 0.056311150
    [366,] 0.07259146 0.047767926
    [367,] 0.21081733 0.060586947
    [368,] 0.11073042 0.156879746
    [369,] 0.24261945 0.035908773
    [370,] 0.08222834 0.123870571
    [371,] 0.08245102 0.060117144
    [372,] 0.18750045 0.067539714
    [373,] 0.05039849 0.012973466
    [374,] 0.19407651 0.069452671
    [375,] 0.08409456 0.057889848
    [376,] 0.09833801 0.066441713
    [377,] 0.11929054 0.276944660
    [378,] 0.11620991 0.032967872
    [379,] 0.14755434 0.066946677
    [380,] 0.16881936 0.088050553
    [381,] 0.23819508 0.032010096
    [382,] 0.23595695 0.075371359
    [383,] 0.14464593 0.271976591
    [384,] 0.15853871 0.013953025
    [385,] 0.12708793 0.147421634
    [386,] 0.13893242 0.125783403
    [387,] 0.15664159 0.118074291
    [388,] 0.20992966 0.153207106
    [389,] 0.18160355 0.079057950
    [390,] 0.16307288 0.011256476
    [391,] 0.14359529 0.013619337
    [392,] 0.24110040 0.034651916
    [393,] 0.16626428 0.048892396
    [394,] 0.10205319 0.099258363
    [395,] 0.06088411 0.035842970
    [396,] 0.10840579 0.203783749
    [397,] 0.13980727 0.003350112
    [398,] 0.04648763 0.032134171
    [399,] 0.21370990 0.045924432
    [400,] 0.11750905 0.026515413
    [401,] 0.19153900 0.198443131
    [402,] 0.08199155 0.042970971
    [403,] 0.14659156 0.076803816
    [404,] 0.22581549 0.054858141
    [405,] 0.08239716 0.109835575
    [406,] 0.14739277 0.059589195
    [407,] 0.25437277 0.103670244
    [408,] 0.00291913 0.184927836
    [409,] 0.18872271 0.057572884
    [410,] 0.15815704 0.144738059
    [411,] 0.11170943 0.001346744
    [412,] 0.12564205 0.066422064
    [413,] 0.13530559 0.021752739
    [414,] 0.07219475 0.015012381
    [415,] 0.13165030 0.031269504
    [416,] 0.07658338 0.076269540
    [417,] 0.09159191 0.065369228
    [418,] 0.06853253 0.006766776
    [419,] 0.18000586 0.129773360
    [420,] 0.24209175 0.078932803
    [421,] 0.32452587 0.110194182
    [422,] 0.04563953 0.133182030
    [423,] 0.22549290 0.091552071
    [424,] 0.16518673 0.115018196
    [425,] 0.20610802 0.013412600
    [426,] 0.06511286 0.031141002
    [427,] 0.12364687 0.036092600
    [428,] 0.08377563 0.063712015
    [429,] 0.22941583 0.102289186
    [430,] 0.21158162 0.122061757
    [431,] 0.05752441 0.065304365
    [432,] 0.13216758 0.100712634
    [433,] 0.10671621 0.029547164
    [434,] 0.13939951 0.088337978
    [435,] 0.06074397 0.107741550
    [436,] 0.14042305 0.115897115
    [437,] 0.10754061 0.032418856
    [438,] 0.10173080 0.074336248
    [439,] 0.15032692 0.093549338
    [440,] 0.10545119 0.068893825
    [441,] 0.03625856 0.027912069
    [442,] 0.30752674 0.147060663
    [443,] 0.09987417 0.081383252
    [444,] 0.13480985 0.063926605
    [445,] 0.17820155 0.008665094
    [446,] 0.21252425 0.220349898
    [447,] 0.09055429 0.069453660
    [448,] 0.06077539 0.032862171
    [449,] 0.06616562 0.142211351
    [450,] 0.21431929 0.042986808
    [451,] 0.13398529 0.066358871
    [452,] 0.22760803 0.050934224
    [453,] 0.19015468 0.086143767
    [454,] 0.10456665 0.124105782
    [455,] 0.15234481 0.005316739
    [456,] 0.15915976 0.079465062
    [457,] 0.06728294 0.030574411
    [458,] 0.08167534 0.023025330
    [459,] 0.10819912 0.065458338
    [460,] 0.28515448 0.058733766
    [461,] 0.03210985 0.111136304
    [462,] 0.23294590 0.025645717
    [463,] 0.15865825 0.008118703
    [464,] 0.09212305 0.062892192
    [465,] 0.09805582 0.127846908
    [466,] 0.10720256 0.115537681
    [467,] 0.14216950 0.102790013
    [468,] 0.19745463 0.139869910
    [469,] 0.29287183 0.160768909
    [470,] 0.12746177 0.103784296
    [471,] 0.20254916 0.106535457
    [472,] 0.10365229 0.015413078
    [473,] 0.18381401 0.097945045
    [474,] 0.13291866 0.142627284
    [475,] 0.10151066 0.092277466
    [476,] 0.13307860 0.053391380
    [477,] 0.14256422 0.006895523
    [478,] 0.23065419 0.034752851
    [479,] 0.06278451 0.129441961
    [480,] 0.04765861 0.091227638
    [481,] 0.09453539 0.184537024
    [482,] 0.20423990 0.049599249
    [483,] 0.03748019 0.106717631
    [484,] 0.08172297 0.015328617
    [485,] 0.17892826 0.126007797
    [486,] 0.10442726 0.133419395
    [487,] 0.10242888 0.181333380
    [488,] 0.09083132 0.023393175
    [489,] 0.12804512 0.041020813
    [490,] 0.05179482 0.043115710
    [491,] 0.12621336 0.058962287
    [492,] 0.06029942 0.045371124
    [493,] 0.08710895 0.087909640
    [494,] 0.21655229 0.100650744
    [495,] 0.08796855 0.037639561
    [496,] 0.28374837 0.117412160
    [497,] 0.17757079 0.063829295
    [498,] 0.04651806 0.045688853
    [499,] 0.09657332 0.034401765
    [500,] 0.08579913 0.012017292
 [ reached getOption("max.print") -- omitted 7500 rows ]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand[,1], mlowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------"
mlowreg_rand_drop0 <- extract(fit_mreg_low_drop0, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand_drop0[,1], mlowreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low_drop0, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3 --------------------------------------------"
mhighreg_rand_samples <- extract(fit_mreg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(mhighreg_rand_samples), 900)
mhighreg_rand <- mhighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mhighreg_rand[,1], mhighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mhighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


# print('---------------------------- First Pass Regression >= 0.3 no 0s --------------------------------------------')
mhighreg_rand_drop0_samples <- extract(fit_mreg_high_drop0, "x_rand")[[1]]
selected_indices <- sample(1:nrow(mhighreg_rand_drop0_samples), 900)
mhighreg_rand_drop0 <- mhighreg_rand_drop0_samples[selected_indices, ]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color
points(mhighreg_rand_drop0[,1], mhighreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high_drop0, pch=16, col="red")

# add dataEllipse with color
dataEllipse(mhighreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

model eye tracking to eye tracking correlation


print("Gaze Duration")
[1] "Gaze Duration"
# View(provo_eyetr_grouped_df)

egd_df = provo_eyetr_grouped_df %>% filter(metric == "gaze_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egd_df$eyetr_value_1, egd_df$eyetr_value_2)$estimate)
   cor 
0.9146 
# View(egd_df)

egd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egd_temp <- egd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(egd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")

egd_data = list(x=egd_temp, N=nrow(egd_temp))

fit_egd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egd, file = paste0("./bayesian_models_cor/eyetr_eyetr_gaze_duration_cor.rds"))
print("Go Past Time")
[1] "Go Past Time"
egpt_df = provo_eyetr_grouped_df %>% filter(metric == "go_past_time") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egpt_df$eyetr_value_1, egpt_df$eyetr_value_2)$estimate)
   cor 
0.9134 
# View(egd_df)

egpt_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egpt_temp <- egpt_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(egpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")

egpt_data = list(x=egpt_temp, N=nrow(egpt_temp))

fit_egpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egpt, file = paste0("./bayesian_models_cor/eyetr_eyetr_go_past_time_cor.rds"))
print("Total Duration")
[1] "Total Duration"
etd_df = provo_eyetr_grouped_df %>% filter(metric == "total_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(etd_df$eyetr_value_1, etd_df$eyetr_value_2)$estimate)
   cor 
0.9272 
# View(egd_df)

etd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

etd_temp <- etd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(etd_temp, pch = 16, col = "blue",
     main = "Total Duration Not Log-Transformed")

etd_data = list(x=etd_temp, N=nrow(etd_temp))

fit_etd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=etd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_etd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_etd, file = paste0("./bayesian_models_cor/eyetr_eyetr_total_duration_cor.rds"))
print("Fisrt Pass Regression Prob.")
[1] "Fisrt Pass Regression Prob."
ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)
  )
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
  cor 
0.741 
# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg Not Log-Transformed")

# -------fit model FPReg ----------

# View(ereg_temp)
ereg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_ereg = stan(
  file="stan_models/bivariate_normal_reg.stan", 
  data=ereg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99),
  verbose = FALSE
  )

# Save the model 
fit_ereg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_ereg, file = paste0("./bayesian_models_cor/eyetr_eyetr_FPReg_cor5.rds"))
fit_egd = readRDS("./bayesian_models_cor/eyetr_eyetr_gaze_duration_cor.rds")
fit_egpt = readRDS("./bayesian_models_cor/eyetr_eyetr_go_past_time_cor.rds")
fit_etd = readRDS("./bayesian_models_cor/eyetr_eyetr_total_duration_cor.rds")
fit_ereg = readRDS("./bayesian_models_cor/eyetr_eyetr_FPReg_cor.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
print(fit_egd)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           128.59    0.02   1.11    126.37    127.86    128.61    129.35    130.72  4725    1
mu[2]           118.86    0.01   0.98    116.88    118.20    118.87    119.51    120.76  4981    1
sigma[1]         80.51    0.01   0.90     78.74     79.90     80.52     81.12     82.25  4570    1
sigma[2]         71.27    0.01   0.79     69.71     70.73     71.27     71.80     72.81  4737    1
nu               25.93    0.07   4.93     18.41     22.47     25.17     28.62     37.53  5323    1
rho               0.93    0.00   0.00      0.93      0.93      0.93      0.93      0.94  5350    1
cov[1,1]       6483.12    2.15 145.27   6200.11   6384.78   6483.59   6579.87   6765.15  4571    1
cov[1,2]       5343.14    1.82 117.99   5110.89   5262.31   5342.62   5422.39   5572.98  4180    1
cov[2,1]       5343.14    1.82 117.99   5110.89   5262.31   5342.62   5422.39   5572.98  4180    1
cov[2,2]       5079.72    1.65 113.15   4859.11   5002.72   5079.35   5155.69   5301.71  4730    1
x_rand[1]       142.08    0.85  72.88     19.00     88.95    136.40    189.59    301.58  7381    1
x_rand[2]       130.48    0.76  65.14     20.87     82.14    126.27    171.93    269.53  7385    1
attempt           0.07    0.00   0.28      0.00      0.00      0.00      0.00      1.00  8017    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -53400.86    0.03   1.74 -53405.16 -53401.77 -53400.53 -53399.58 -53398.45  3567    1

Samples were drawn using NUTS(diag_e) at Sat Jul 22 21:28:08 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
print(fit_egpt)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           151.87    0.02   1.54    148.90    150.83    151.88    152.91    154.87  4316    1
mu[2]           139.50    0.02   1.35    136.85    138.58    139.50    140.40    142.16  4309    1
sigma[1]        104.23    0.02   1.38    101.57    103.29    104.23    105.15    106.96  4517    1
sigma[2]         91.29    0.02   1.18     89.01     90.49     91.28     92.10     93.67  4238    1
nu                9.84    0.01   0.97      8.17      9.16      9.77     10.43     12.02  4294    1
rho               0.93    0.00   0.00      0.93      0.93      0.93      0.94      0.94  5682    1
cov[1,1]      10866.34    4.27 286.82  10316.57  10668.36  10863.38  11057.52  11440.23  4514    1
cov[1,2]       8897.70    3.58 230.96   8458.01   8739.41   8893.80   9052.34   9359.86  4154    1
cov[2,1]       8897.70    3.58 230.96   8458.01   8739.41   8893.80   9052.34   9359.86  4154    1
cov[2,2]       8335.83    3.32 215.96   7923.28   8187.73   8331.15   8483.17   8774.46  4239    1
x_rand[1]       171.31    1.07  96.66     19.27    101.02    159.61    228.57    389.14  8093    1
x_rand[2]       156.09    0.95  85.13     18.99     93.98    147.13    208.02    344.02  7994    1
attempt           0.12    0.00   0.37      0.00      0.00      0.00      0.00      1.00  7345    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -55541.43    0.03   1.77 -55545.72 -55542.36 -55541.11 -55540.13 -55539.02  3362    1

Samples were drawn using NUTS(diag_e) at Sat Jul 22 21:52:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
print(fit_etd)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           182.46    0.02   1.40    179.71    181.51    182.43    183.42    185.23  4475    1
mu[2]           163.16    0.02   1.24    160.74    162.32    163.15    164.00    165.56  4552    1
sigma[1]        102.05    0.02   1.18     99.76    101.25    102.05    102.84    104.35  4659    1
sigma[2]         90.80    0.01   1.04     88.77     90.11     90.80     91.49     92.86  4859    1
nu               17.08    0.04   2.46     13.14     15.32     16.80     18.44     22.84  4836    1
rho               0.94    0.00   0.00      0.94      0.94      0.94      0.94      0.95  4836    1
cov[1,1]      10414.71    3.51 240.12   9951.10  10251.81  10414.32  10575.72  10888.84  4667    1
cov[1,2]       8723.92    2.96 197.89   8344.44   8591.58   8725.13   8854.22   9120.77  4456    1
cov[2,1]       8723.92    2.96 197.89   8344.44   8591.58   8725.13   8854.22   9120.77  4456    1
cov[2,2]       8245.86    2.71 189.17   7879.76   8119.40   8245.31   8370.73   8622.71  4862    1
x_rand[1]       197.26    1.10  96.53     32.70    128.21    189.92    257.81    404.81  7760    1
x_rand[2]       176.38    0.99  86.54     30.04    113.27    170.41    230.58    357.63  7701    1
attempt           0.06    0.00   0.25      0.00      0.00      0.00      0.00      1.00  7873    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -55910.26    0.03   1.76 -55914.61 -55911.19 -55909.93 -55908.98 -55907.84  3286    1

Samples were drawn using NUTS(diag_e) at Sat Jul 22 22:21:40 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.--------------------------------------------"
print(fit_ereg)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                 mean se_mean   sd     2.5%      25%      50%      75%    97.5% n_eff Rhat
mu[1]            0.11    0.00 0.00     0.10     0.10     0.11     0.11     0.11  7352    1
mu[2]            0.11    0.00 0.00     0.10     0.11     0.11     0.11     0.11  6993    1
sigma[1]         0.11    0.00 0.00     0.10     0.11     0.11     0.11     0.11  5584    1
sigma[2]         0.10    0.00 0.00     0.10     0.10     0.10     0.10     0.11  5441    1
nu               3.16    0.00 0.13     2.92     3.07     3.16     3.24     3.42  6221    1
rho              0.73    0.00 0.01     0.71     0.73     0.73     0.74     0.76  7209    1
cov[1,1]         0.01    0.00 0.00     0.01     0.01     0.01     0.01     0.01  5581    1
cov[1,2]         0.01    0.00 0.00     0.01     0.01     0.01     0.01     0.01  4877    1
cov[2,1]         0.01    0.00 0.00     0.01     0.01     0.01     0.01     0.01  4877    1
cov[2,2]         0.01    0.00 0.00     0.01     0.01     0.01     0.01     0.01  5442    1
x_rand[1]        0.17    0.00 0.14     0.01     0.08     0.14     0.22     0.47  7543    1
x_rand[2]        0.17    0.00 0.14     0.01     0.08     0.14     0.21     0.46  8201    1
attempt          0.37    0.01 0.70     0.00     0.00     0.00     1.00     2.00  7941    1
max_attempts    10.00     NaN 0.00    10.00    10.00    10.00    10.00    10.00   NaN  NaN
lp__         11696.50    0.03 1.76 11692.31 11695.54 11696.82 11697.81 11698.90  3562    1

Samples were drawn using NUTS(diag_e) at Sun Jul 23 01:21:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
# stan_trace(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_egd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_egd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_egd)
stan_dens(fit_egd, separate_chains = TRUE)
stan_plot(fit_egd)

# Go Past Time
stan_trace(fit_egpt)
stan_dens(fit_egpt, separate_chains = TRUE)
stan_plot(fit_egpt)

# Total Duration
stan_trace(fit_etd)
stan_dens(fit_etd, separate_chains = TRUE)
stan_plot(fit_etd)

# FPReg
stan_trace(fit_ereg)
stan_dens(fit_ereg, separate_chains = TRUE)
stan_plot(fit_ereg)
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
rho_egd = as.numeric(extract(fit_egd, "rho")[[1]])
mean = mean(rho_egd)
crI = quantile(rho_egd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.9311
HPD: [0.9263, 0.9359]
crI: [0.9262, 0.9358]
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
rho_egpt = as.numeric(extract(fit_egpt, "rho")[[1]])
mean = mean(rho_egpt)
crI = quantile(rho_egpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.9349
HPD: [0.9297, 0.9395]
crI: [0.9298, 0.9397]
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
rho_etd = as.numeric(extract(fit_etd, "rho")[[1]])
mean = mean(rho_etd)
crI = quantile(rho_etd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_etd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.9414
HPD: [0.9372, 0.9457]
crI: [0.937, 0.9456]
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
rho_ereg = as.numeric(extract(fit_ereg, "rho")[[1]])
mean = mean(rho_ereg)
crI = quantile(rho_ereg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.7339
HPD: [0.7128, 0.7557]
crI: [0.7119, 0.755]
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
egd_rand <- extract(fit_egd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egd_rand[,1], egd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
egpt_rand <- extract(fit_egpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egpt_rand[,1], egpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
etd_rand <- extract(fit_etd, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(etd_rand[,1], etd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(etd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(etd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(etd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
ereg_rand <- extract(fit_ereg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "First Pass Regression") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ereg_rand[,1], ereg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ereg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ereg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

Bayesian – correlation between MoTR and word level statistics

print("Log Frequency")
[1] "Log Frequency"
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$freq)$estimate)
    cor 
-0.7454 
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$freq)$estimate)
    cor 
-0.8069 
# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(7, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mfreq_temp <- stats_cor_df[c("motr_value", "freq")] %>%
  data.matrix()
efreq_temp <- stats_cor_df[c("eyetr_value", "freq")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(mfreq_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Frequency")

# Plot the first data matrix gd_temp
plot(efreq_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Frequency")

motr & frequency

mfreq_data = list(x=mfreq_temp, N=nrow(mfreq_temp))

fit_mfreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=mfreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mfreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mfreq, file = paste0("./bayesian_models_cor/motr_freq_cor.rds"))

eyetr & frequency

efreq_data = list(x=efreq_temp, N=nrow(efreq_temp))

fit_efreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=efreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_efreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_efreq, file = paste0("./bayesian_models_cor/eyetr_freq_cor.rds"))
print("Length")
[1] "Length"
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$len)$estimate)
   cor 
0.8644 
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$len)$estimate)
   cor 
0.8597 
# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(9, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mlen_temp <- stats_cor_df[c("motr_value", "len")] %>%
  data.matrix()
elen_temp <- stats_cor_df[c("eyetr_value", "len")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(mlen_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Length")

# Plot the first data matrix gd_temp
plot(elen_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Length")

motr & length

mlen_data = list(x=mlen_temp, N=nrow(mlen_temp))

fit_mlen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=mlen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mlen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mlen, file = paste0("./bayesian_models_cor/motr_len_cor2.rds"))

eyetr & length

elen_data = list(x=elen_temp, N=nrow(elen_temp))

fit_elen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=elen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_elen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_elen, file = paste0("./bayesian_models_cor/eyetr_len_cor.rds"))
print("Surprisal")
[1] "Surprisal"
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$surp)$estimate)
   cor 
0.4978 
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$surp)$estimate)
   cor 
0.5683 
# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(8, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

msurp_temp <- stats_cor_df[c("motr_value", "surp")] %>%
  data.matrix()
esurp_temp <- stats_cor_df[c("eyetr_value", "surp")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(msurp_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Surprisal")

# Plot the first data matrix gd_temp
plot(esurp_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Surprisal")

motr & surprisal

msurp_data = list(x=msurp_temp, N=nrow(msurp_temp))

fit_msurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=msurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_msurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_msurp, file = paste0("./bayesian_models_cor/motr_surp_cor.rds"))

eyetr & surprisal

esurp_data = list(x=esurp_temp, N=nrow(esurp_temp))

fit_esurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=esurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_esurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_esurp, file = paste0("./bayesian_models_cor/eyetr_surp_cor.rds"))
fit_mfreq = readRDS("./bayesian_models_cor/motr_freq_cor.rds")
fit_efreq = readRDS("./bayesian_models_cor/eyetr_freq_cor.rds")
fit_mlen = readRDS("./bayesian_models_cor/motr_len_cor.rds")
fit_elen = readRDS("./bayesian_models_cor/eyetr_len_cor.rds")
fit_msurp = readRDS("./bayesian_models_cor/motr_surp_cor.rds")
fit_esurp = readRDS("./bayesian_models_cor/eyetr_surp_cor.rds")

print('---------------------------- MoTR & Log Frequency --------------------------------------------')
[1] "---------------------------- MoTR & Log Frequency --------------------------------------------"
print(fit_mfreq)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           206.16    0.02   1.93    202.44    204.86    206.17    207.47    209.88  8522    1
mu[2]             5.80    0.00   0.02      5.77      5.79      5.80      5.81      5.84  8983    1
sigma[1]        138.91    0.02   1.50    136.02    137.87    138.92    139.91    141.86  8278    1
sigma[2]          1.34    0.00   0.01      1.32      1.33      1.34      1.35      1.37  8274    1
nu               99.82    0.24  23.02     62.37     83.46     97.35    113.85    150.17  9319    1
rho              -0.76    0.00   0.01     -0.78     -0.77     -0.76     -0.76     -0.75  8377    1
cov[1,1]      19299.02    4.59 417.70  18501.76  19006.99  19298.28  19575.75  20123.93  8268    1
cov[1,2]       -141.93    0.04   3.34   -148.59   -144.18   -141.93   -139.65   -135.44  6864    1
cov[2,1]       -141.93    0.04   3.34   -148.59   -144.18   -141.93   -139.65   -135.44  6864    1
cov[2,2]          1.80    0.00   0.04      1.73      1.78      1.80      1.83      1.87  8265    1
x_rand[1]       227.30    1.38 121.53     20.78    136.69    221.29    306.64    484.97  7799    1
x_rand[2]         5.66    0.01   1.26      3.09      4.82      5.70      6.53      8.03  8041    1
attempt           0.07    0.00   0.27      0.00      0.00      0.00      0.00      1.00  7926    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -36159.81    0.03   1.73 -36164.02 -36160.74 -36159.48 -36158.53 -36157.45  3770    1

Samples were drawn using NUTS(diag_e) at Sun Jul 23 15:18:27 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- EyeTR & Log Frequency --------------------------------------------')
[1] "---------------------------- EyeTR & Log Frequency --------------------------------------------"
print(fit_efreq)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           129.58    0.01   0.95    127.71    128.96    129.59    130.21    131.45  7967    1
mu[2]             5.79    0.00   0.02      5.75      5.78      5.79      5.80      5.82  8371    1
sigma[1]         73.11    0.01   0.75     71.66     72.60     73.10     73.61     74.61  7440    1
sigma[2]          1.34    0.00   0.01      1.31      1.33      1.34      1.35      1.36  7478    1
nu               91.76    0.24  21.94     56.83     76.18     89.08    104.50    142.84  8551    1
rho              -0.82    0.00   0.01     -0.83     -0.82     -0.82     -0.81     -0.81  8039    1
cov[1,1]       5345.65    1.28 110.19   5134.97   5270.29   5343.14   5418.78   5566.32  7432    1
cov[1,2]        -80.04    0.02   1.70    -83.43    -81.17    -80.03    -78.89    -76.73  6431    1
cov[2,1]        -80.04    0.02   1.70    -83.43    -81.17    -80.03    -78.89    -76.73  6431    1
cov[2,2]          1.79    0.00   0.04      1.72      1.77      1.79      1.81      1.86  7476    1
x_rand[1]       137.10    0.77  67.73     16.30     87.55    134.62    182.36    275.36  7640    1
x_rand[2]         5.68    0.01   1.27      3.13      4.82      5.68      6.55      8.11  7779    1
attempt           0.04    0.00   0.19      0.00      0.00      0.00      0.00      1.00  8076    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -32784.74    0.03   1.72 -32789.05 -32785.64 -32784.43 -32783.48 -32782.37  4038    1

Samples were drawn using NUTS(diag_e) at Sun Jul 23 15:44:55 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- MoTR & Length --------------------------------------------')
[1] "---------------------------- MoTR & Length --------------------------------------------"
print(fit_mlen)
Inference for Stan model: stats_correlation_len_normal.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           197.87    0.03   2.04    193.81    196.48    197.88    199.27    201.86  4675    1
mu[2]             4.40    0.00   0.04      4.33      4.38      4.40      4.43      4.48  4776    1
sigma[1]        136.99    0.03   1.75    133.57    135.79    136.99    138.19    140.43  4848    1
sigma[2]          2.51    0.00   0.03      2.45      2.49      2.51      2.53      2.58  4665    1
nu               17.03    0.04   2.84     12.51     15.05     16.64     18.61     23.71  4796    1
rho               0.89    0.00   0.00      0.88      0.88      0.89      0.89      0.89  5940    1
cov[1,1]      18768.69    6.88 479.04  17841.00  18438.93  18766.08  19096.47  19721.77  4845    1
cov[1,2]        305.06    0.12   7.91    289.81    299.65    305.00    310.39    320.87  4341    1
cov[2,1]        305.06    0.12   7.91    289.81    299.65    305.00    310.39    320.87  4341    1
cov[2,2]          6.31    0.00   0.17      5.98      6.19      6.30      6.42      6.64  4664    1
x_rand[1]       219.00    1.39 122.76     20.11    127.61    208.39    297.11    488.02  7751    1
x_rand[2]         5.26    0.03   2.36      1.00      4.00      5.00      7.00     10.00  7861    1
attempt           0.10    0.00   0.33      0.00      0.00      0.00      0.00      1.00  7797    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -38437.19    0.03   1.75 -38441.43 -38438.10 -38436.86 -38435.92 -38434.79  3210    1

Samples were drawn using NUTS(diag_e) at Sun Jul 23 19:35:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- EyeTR & Length --------------------------------------------')
[1] "---------------------------- EyeTR & Length --------------------------------------------"
print(fit_elen)
Inference for Stan model: stats_correlation_len_normal.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           125.47    0.01   1.05    123.48    124.75    125.46    126.18    127.53  5037    1
mu[2]             4.45    0.00   0.04      4.38      4.43      4.45      4.48      4.52  4809    1
sigma[1]         72.58    0.01   0.88     70.87     71.98     72.57     73.15     74.31  4816    1
sigma[2]          2.49    0.00   0.03      2.42      2.47      2.49      2.51      2.55  4786    1
nu               17.29    0.04   2.69     13.08     15.35     16.97     18.80     23.46  4894    1
rho               0.88    0.00   0.00      0.87      0.88      0.88      0.89      0.89  5800    1
cov[1,1]       5268.20    1.84 127.51   5022.34   5181.60   5266.78   5351.59   5522.02  4819    1
cov[1,2]        159.48    0.06   3.98    151.79    156.77    159.44    162.13    167.48  4232    1
cov[2,1]        159.48    0.06   3.98    151.79    156.77    159.44    162.13    167.48  4232    1
cov[2,2]          6.19    0.00   0.16      5.88      6.08      6.19      6.30      6.51  4790    1
x_rand[1]       135.19    0.75  67.87     18.34     85.18    132.06    179.13    278.49  8296    1
x_rand[2]         5.29    0.03   2.41      1.00      4.00      5.00      7.00     10.00  8133    1
attempt           0.06    0.00   0.24      0.00      0.00      0.00      0.00      1.00  7560    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -35403.80    0.03   1.76 -35408.14 -35404.73 -35403.49 -35402.51 -35401.38  3435    1

Samples were drawn using NUTS(diag_e) at Sun Jul 23 19:55:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- MoTR & Surprisal --------------------------------------------')
[1] "---------------------------- MoTR & Surprisal --------------------------------------------"
print(fit_msurp)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           193.29    0.02   2.22    189.00    191.80    193.27    194.83    197.56  7883    1
mu[2]             6.02    0.00   0.08      5.86      5.97      6.02      6.08      6.19  8202    1
sigma[1]        135.56    0.02   1.87    132.03    134.29    135.53    136.83    139.29  6733    1
sigma[2]          5.10    0.00   0.08      4.95      5.05      5.10      5.15      5.25  6738    1
nu               11.72    0.02   1.35      9.45     10.76     11.58     12.56     14.76  6506    1
rho               0.57    0.00   0.01      0.54      0.56      0.57      0.58      0.60  7907    1
cov[1,1]      18380.00    6.19 508.20  17430.66  18032.66  18367.66  18723.49  19401.81  6740    1
cov[1,2]        396.36    0.20  15.81    365.40    385.79    396.30    407.09    427.00  6245    1
cov[2,1]        396.36    0.20  15.81    365.40    385.79    396.30    407.09    427.00  6245    1
cov[2,2]         25.98    0.01   0.78     24.50     25.45     25.98     26.51     27.56  6750    1
x_rand[1]       229.82    1.41 125.86     23.36    137.77    220.32    308.08    504.67  7929    1
x_rand[2]         7.63    0.05   4.51      0.66      4.30      7.12     10.37     17.97  7826    1
attempt           0.21    0.01   0.51      0.00      0.00      0.00      0.00      2.00  7854    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -43211.31    0.03   1.76 -43215.70 -43212.20 -43210.97 -43210.02 -43208.89  3913    1

Samples were drawn using NUTS(diag_e) at Sun Jul 23 20:47:52 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
[1] "---------------------------- EyeTR & Surprisal --------------------------------------------"
print(fit_esurp)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           123.52    0.01   1.17    121.28    122.73    123.52    124.30    125.81  7205    1
mu[2]             6.14    0.00   0.08      5.97      6.08      6.13      6.19      6.30  7266    1
sigma[1]         72.79    0.01   0.95     70.95     72.15     72.77     73.43     74.64  6894    1
sigma[2]          5.16    0.00   0.07      5.01      5.11      5.16      5.21      5.30  5938    1
nu               14.77    0.03   1.97     11.49     13.38     14.56     15.97     19.18  5988    1
rho               0.63    0.00   0.01      0.61      0.62      0.63      0.64      0.66  8585    1
cov[1,1]       5299.28    1.66 138.13   5033.28   5205.63   5295.99   5392.41   5571.18  6894    1
cov[1,2]        237.27    0.11   8.26    221.19    231.62    237.24    242.84    253.80  6152    1
cov[2,1]        237.27    0.11   8.26    221.19    231.62    237.24    242.84    253.80  6152    1
cov[2,2]         26.59    0.01   0.77     25.10     26.07     26.58     27.11     28.14  5952    1
x_rand[1]       141.12    0.80  69.27     22.01     91.25    136.11    184.96    290.51  7508    1
x_rand[2]         7.68    0.05   4.55      0.67      4.31      7.15     10.45     17.96  7265    1
attempt           0.17    0.00   0.45      0.00      0.00      0.00      0.00      1.00  8132    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -40032.38    0.03   1.76 -40036.66 -40033.33 -40032.04 -40031.08 -40029.99  3594    1

Samples were drawn using NUTS(diag_e) at Sun Jul 23 21:06:56 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
# MoTR & Log Freq
stan_trace(fit_mfreq)
stan_dens(fit_mfreq, separate_chains = TRUE)
stan_plot(fit_mfreq)

# EyeTR & Log Freq
stan_trace(fit_efreq)
stan_dens(fit_efreq, separate_chains = TRUE)
stan_plot(fit_efreq)

# MoTR & Len
stan_trace(fit_mlen)
stan_dens(fit_mlen, separate_chains = TRUE)
stan_plot(fit_mlen)

# EyeTR & Len
stan_trace(fit_elen)
stan_dens(fit_elen, separate_chains = TRUE)
stan_plot(fit_elen)

# MoTR & Surprisal
stan_trace(fit_msurp)
stan_dens(fit_msurp, separate_chains = TRUE)
stan_plot(fit_msurp)

# EyeTR & Surprisal
stan_trace(fit_esurp)
stan_dens(fit_esurp, separate_chains = TRUE)
stan_plot(fit_esurp)
print('---------------------------- MoTR & Log Freq --------------------------------------------')
[1] "---------------------------- MoTR & Log Freq --------------------------------------------"
rho_mfreq = as.numeric(extract(fit_mfreq, "rho")[[1]])
mean = mean(rho_mfreq)
crI = quantile(rho_mfreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mfreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: -0.7613
HPD: [-0.776, -0.7461]
crI: [-0.7762, -0.7462]
print('---------------------------- EyeTR & Log Freq --------------------------------------------')
[1] "---------------------------- EyeTR & Log Freq --------------------------------------------"
rho_efreq = as.numeric(extract(fit_efreq, "rho")[[1]])
mean = mean(rho_efreq)
crI = quantile(rho_efreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_efreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: -0.8182
HPD: [-0.8297, -0.8065]
crI: [-0.8295, -0.8062]
print('---------------------------- MoTR & Length --------------------------------------------')
[1] "---------------------------- MoTR & Length --------------------------------------------"
rho_mlen = as.numeric(extract(fit_mlen, "rho")[[1]])
mean = mean(rho_mlen)
crI = quantile(rho_mlen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mlen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.8867
HPD: [0.8784, 0.8943]
crI: [0.8784, 0.8943]
print('---------------------------- EyeTR & Length --------------------------------------------')
[1] "---------------------------- EyeTR & Length --------------------------------------------"
rho_elen = as.numeric(extract(fit_elen, "rho")[[1]])
mean = mean(rho_elen)
crI = quantile(rho_elen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_elen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.883
HPD: [0.8743, 0.8908]
crI: [0.8746, 0.8911]
print('---------------------------- MoTR & Surprisal --------------------------------------------')
[1] "---------------------------- MoTR & Surprisal --------------------------------------------"
rho_msurp = as.numeric(extract(fit_msurp, "rho")[[1]])
mean = mean(rho_msurp)
crI = quantile(rho_msurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_msurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.5735
HPD: [0.5447, 0.6033]
crI: [0.5432, 0.602]
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
[1] "---------------------------- EyeTR & Surprisal --------------------------------------------"
rho_esurp = as.numeric(extract(fit_esurp, "rho")[[1]])
mean = mean(rho_esurp)
crI = quantile(rho_esurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_esurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.632
HPD: [0.6075, 0.6574]
crI: [0.6066, 0.6569]
print('---------------------------- MoTR & Log Frequency--------------------------------------------')
[1] "---------------------------- MoTR & Log Frequency--------------------------------------------"
mfreq_rand <- extract(fit_mfreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 12), type="n",
     xlab = "MoTR value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mfreq_rand[,1], mfreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mfreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mfreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mfreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- EyeTR & Log Frequency--------------------------------------------')
[1] "---------------------------- EyeTR & Log Frequency--------------------------------------------"
efreq_rand <- extract(fit_efreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 500), ylim=c(0, 12), type="n",
     xlab = "Eye tracking value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(efreq_rand[,1], efreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(efreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(efreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(efreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- MoTR & Length --------------------------------------------')
[1] "---------------------------- MoTR & Length --------------------------------------------"
mlen_rand <- extract(fit_mlen, "x_rand")[[1]]
# mlen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlen_rand[,1], mlen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mlen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- EyeTR & Length --------------------------------------------')
[1] "---------------------------- EyeTR & Length --------------------------------------------"
elen_rand <- extract(fit_elen, "x_rand")[[1]]
# elen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elen_rand[,1], elen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(elen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- MoTR & Surprisal --------------------------------------------')
[1] "---------------------------- MoTR & Surprisal --------------------------------------------"
msurp_rand <- extract(fit_msurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(msurp_rand [,1], msurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(msurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(msurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(msurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


print('---------------------------- EyeTR & Surprisal --------------------------------------------')
[1] "---------------------------- EyeTR & Surprisal --------------------------------------------"
esurp_rand <- extract(fit_esurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(esurp_rand [,1], esurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(esurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(esurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(esurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print("EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 ")

ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5))

ereg_df_low = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 < 0.3)
# View(ereg_df_low)

ereg_df_high = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 >= 0.3)
# View(ereg_df_high) 

print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$p.value)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$estimate)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$p.value)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$estimate)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$p.value)

# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_low <- ereg_df_low[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_high <- ereg_df_high[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg all data Not Log-Transformed")
plot(ereg_temp_low, pch = 16, col = "blue",
     main = "FPReg < 0.3 Not Log-Transformed")
plot(ereg_temp_high, pch = 16, col = "blue",
     main = "FPReg > 0.3 Not Log-Transformed")
# -------fit model eyetr vs. eyetr FPReg <0.3 & >=0.3 ----------
reg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models_cor/eyetr_eyetr_FPReg_cor_all_data.rds"))
# fit_ereg_all = readRDS("./eyetr_eyetr_FPReg_cor_all_data.rds")
fit_ereg_all = readRDS("./bayesian_models_cor/eyetr_eyetr_FPReg_cor.rds")
fit_ereg_low = readRDS("./bayesian_models_cor/eyetr_eyetr_FPReg_cor_00-03.rds")
fit_ereg_high = readRDS("./bayesian_models_cor/eyetr_eyetr_FPReg_cor_03-1.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_ereg_all)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_ereg_low)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_ereg_high)
# # FPReg all data
stan_trace(fit_ereg_all)
stan_dens(fit_ereg_all, separate_chains = TRUE)
stan_plot(fit_ereg_all)

# # FPReg < 0.3
stan_trace(fit_ereg_low)
stan_dens(fit_ereg_low, separate_chains = TRUE)
stan_plot(fit_ereg_low)

# FPReg >= 0.3
stan_trace(fit_ereg_high)
stan_dens(fit_ereg_high, separate_chains = TRUE)
stan_plot(fit_ereg_high)
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_ereg_all = as.numeric(extract(fit_ereg_all, "rho")[[1]])
mean = mean(rho_ereg_all)
crI = quantile(rho_ereg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_ereg_low = as.numeric(extract(fit_ereg_low, "rho")[[1]])
mean = mean(rho_ereg_low)
crI = quantile(rho_ereg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_ereg_high = as.numeric(extract(fit_ereg_high, "rho")[[1]])
mean = mean(rho_ereg_high)
crI = quantile(rho_ereg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
print('---------------------------- First Pass Regression all data --------------------------------------------')
eallreg_rand <- extract(fit_ereg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(eallreg_rand[,1], eallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(eallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(eallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
elowreg_rand <- extract(fit_ereg_low, "x_rand")[[1]]
# print(elowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elowreg_rand[,1], elowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
ehighreg_rand_samples <- extract(fit_ereg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(ehighreg_rand_samples), 900)
ehighreg_rand <- ehighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ehighreg_rand[,1], ehighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ehighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ehighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
---
title: "Exploratory Analysis for MoTR Reading Data"
output: html_notebook
---

```{r}
shhh <- suppressPackageStartupMessages # It's a library, so shhh!

shhh(library( mgcv ))
shhh(library(dplyr))
shhh(library(ggplot2))
shhh(library(lme4))
shhh(library(tidymv))
shhh(library(gamlss))
shhh(library(gsubfn))
shhh(library(lmerTest))
shhh(library(tidyverse))
shhh(library(boot))
shhh(library(rsample))
shhh(library(plotrix))
shhh(library(ggrepel))
shhh(library(mgcv))

shhh(library(brms))
shhh(library(bayesplot))
shhh(library(patchwork))
shhh(library(MASS))
shhh(library(tidyr))
shhh(library(extraDistr))
shhh(library(purrr))
# For exercises with Stan code
shhh(library(rstan))
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = FALSE)

library(car)
library(coda)
shhh(library(gridExtra))

theme_set(theme_bw())
options(digits=4)
options(scipen=999)
set.seed(444)
pipe_message = function(.data, status) {message(status); .data}

```


# Read in MoTR Data

```{r}

rate = 160

file_prefix = "../reading_measures/provo/provo_f160/"
fnames = list.files(path=file_prefix)

df = data.frame()
for (f in fnames) {
  temp = read.csv(paste0(file_prefix, "/", f)) %>%
    mutate(subj = str_remove(f, "_reading_measures.csv"))
  df = rbind(df, temp)
}

# Filter out readers who don't answer the comprehension questions correctly
filter_df = df %>%
  group_by(para_nr, subj) %>% summarise(correct = if_else(unique(correctness) == 1, 1, 0)) %>% ungroup() %>%
  drop_na() %>%
  group_by(subj) %>% summarise(p_correct = mean(correct)) %>% ungroup() %>%
  mutate(p_correct = round(p_correct, digits = 2))

filter_df = filter_df %>% filter(p_correct < 0.8)
filter_list = filter_df$subj
## reader_3:0.70, reader_60:0.79, reader_76:0.72 , reader_256:0.71 , reader_262:0.57 

raw_df = df %>%
  filter(! subj %in% c(filter_list)) %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "reader_")) %>%
  mutate(subj = as.character(subj)) %>%
  # filter(! subj %in% c("3", "60", "76", "256", "262")) %>% # Explanation for this filtering
  mutate(FPReg = if_else(total_duration == 0, -1, FPReg)) %>% #If the word is skipped we can't say that it wasn't regressed on the first pass. Set to a "NA"
  dplyr::select(expr_id, cond_id, para_nr, word, word_nr, first_duration, total_duration, gaze_duration, go_past_time, FPReg, subj)

length(unique(raw_df$subj))

df %>%
  filter(! subj %in% c(filter_list)) %>%
  filter(FPReg >= 0) %>%
  dplyr::select(FPReg) %>%
  drop_na() %>%
  summarise( m = mean(FPReg))

df %>%
  filter(! subj %in% c(filter_list)) %>%
  dplyr::select(FPFix) %>%
  drop_na() %>%
  summarise( m = mean(FPFix))


```


```{r}
# Average across subjects
motr_agg_df = raw_df %>%
  gather(metric, value, 6:10) %>%
    filter(value >= 0) %>% #Removes the "NA" values for FPReg
  
    # ==== Remove skipped words
    # mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
    # filter(zero == F) %>%
  
    drop_na() %>%
    group_by(para_nr, word_nr, word, metric) %>% 
      mutate(outlier = if_else(metric != "FPReg" & value > (mean(value) + 3 * sd(value)), T, F)) %>% filter(outlier == F) %>%
      summarise(value = mean(value), nsubj = length(unique(subj))) %>%
  ungroup() %>%
  arrange(para_nr, word_nr) %>%
  rename(text_id = para_nr, word_text_idx = word_nr, motr_value = value)

# View(motr_agg_df)
# write.csv(motr_agg_df, file = "/Users/cui/Desktop/MoTR/pipeline/ancillary_data/motr_agg_df.csv", row.names = FALSE)

```




# Comparison to Provo


```{r}
# Read in Provo surprisal, frequency and length data
provo_modeling_df = read.csv("../ancillary_data/provo_df.csv") %>%
  dplyr::select(text_id, sent_id, trigger_idx, word, freq, surp, len) %>%
  rename(word_idx = trigger_idx)

provo_modeling_df
# View(provo_modeling_df)

```

```{r}
# Read in Provo eyetracking data

provo_raw_df = read.csv("../ancillary_data/provo_eyetracking.csv")

```

```{r}

# unique(provo_raw_df$Participant_ID)
# length(unique(provo_raw_df$Participant_ID))

provo_eyetracking_df = provo_raw_df %>%
  dplyr::select(Participant_ID, Text_ID, Sentence_Number, Word_In_Sentence_Number, Word, Word_Number, IA_FIRST_FIX_PROGRESSIVE, IA_FIRST_RUN_DWELL_TIME, IA_DWELL_TIME, IA_REGRESSION_PATH_DURATION, IA_REGRESSION_OUT, IA_SKIP) %>%
  rename( #first_duration = IA_FIRST_FIXATION_DURATION,   
          gaze_duration = IA_FIRST_RUN_DWELL_TIME,
          total_duration = IA_DWELL_TIME,
          go_past_time = IA_REGRESSION_PATH_DURATION,
          subj = Participant_ID,
          text_id = Text_ID,
          sent_id = Sentence_Number,
          word_idx = Word_In_Sentence_Number,
          word_text_idx = Word_Number,   # IA_ID?
          word = Word,      # Word?
          FPReg = IA_REGRESSION_OUT,
          skip = IA_SKIP,
          ff_progressive = IA_FIRST_FIX_PROGRESSIVE) %>%
  mutate(first_duration = gaze_duration) %>%
  mutate(gaze_duration = if_else(ff_progressive == 0, 0, as.double(gaze_duration)),
         go_past_time = if_else(ff_progressive == 0, 0, as.double(go_past_time))) %>%
  dplyr::select(-ff_progressive) %>%
  
  mutate(
    gaze_duration = if_else(total_duration == 0, 0, as.double(gaze_duration)),
      go_past_time = if_else(total_duration == 0, 0, as.double(go_past_time)),
      FPReg = if_else(total_duration == 0, -1, as.double(FPReg)),
      first_duration =  if_else(total_duration == 0, 0, as.double(first_duration)),
  ) %>%
  
  # drop_na() %>%     # will drop the whole row with all the metrics
  gather(metric, value, 7:12) %>%
  filter(value >= 0) %>%          # filter skipped word in eye tracking data for FPReg
  # ==== Remove skipped words
  # mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
  # filter(zero == F) %>%
  
  # mutate(value = if_else(is.na(value), as.integer(0), as.integer(value))) %>%
  # mutate(value = if_else(metric != "FPReg" & is.na(value), as.integer(0), as.integer(value))) %>%
  drop_na() %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "Sub")) %>%
  mutate(subj = as.integer(subj)) %>%
    group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    mutate(outlier = if_else(metric != "FPReg" & metric != "skip" & value > (mean(value) + 3 * sd(value) ), T, F)) %>%
    filter(outlier == F) %>%
  ungroup() #%>%

# Aggregate cross-participant data for all subjects
provo_eyetracking_agg_df = provo_eyetracking_df %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value),
              nsubj = length(unique(subj))) %>%
    ungroup()

# View(provo_eyetracking_df)

# View(provo_eyetracking_agg_df)
# write.csv(provo_eyetracking_agg_df, file = "/Users/cui/Desktop/MoTR/pipeline/ancillary_data/provo_eyetracking_agg_df.csv", row.names = FALSE)

provo_raw_df %>%
  dplyr::select(IA_REGRESSION_OUT) %>%
  drop_na() %>%
  summarise( m = mean(IA_REGRESSION_OUT))

provo_raw_df %>%
  dplyr::select(IA_SKIP) %>%
  drop_na() %>%
  summarise( m = mean(IA_SKIP))


```

```{r}

# Split the eyetracking data in two by subjects to see how well it correlates with itself
provo_eyetracking_subj1_df_temp = provo_eyetracking_df %>%
  filter(subj <= 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
  rename(value_1 = value) #%>%
  # dplyr::select(-sent_id, -word_idx)

# View(provo_eyetracking_subj1_df_temp)

provo_eyetracking_subj1_df = merge(provo_eyetracking_subj1_df_temp, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
  arrange(text_id, sent_id, word_idx) %>%
  filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
  rename(word = word.y) %>%
  dplyr::select(text_id, word_text_idx, metric, word, value_1)

# View(provo_eyetracking_subj1_df)

provo_eyetracking_subj2_df = provo_eyetracking_df %>%
  filter(subj > 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
    rename(value_2 = value)%>%
  dplyr::select(-sent_id, -word_idx)

# View(provo_eyetracking_subj2_df)
  
provo_eyetr_grouped_df = merge(provo_eyetracking_subj2_df, provo_eyetracking_subj1_df, by=c("text_id", "word_text_idx", "metric")) %>%
  # filter(word.x == word.y) %>%
  dplyr::select(-word.y) %>%
  group_by(metric) %>%
    mutate(motr_outlier = if_else(metric != "FPReg" & metric != "skip" & value_1 > (mean(value_1) + 3 * sd(value_1) ), T, F)) %>%
    filter(motr_outlier == F) %>%
    mutate(eyetr_outlier = if_else(metric != "FPReg" & metric != "skip" & value_2 > (mean(value_2) + 3 * sd(value_2) ), T, F)) %>%
    filter(eyetr_outlier == F) %>%
  ungroup() %>%
  gather(measure, value, c("value_1", "value_2")) %>%
  dplyr::select(-motr_outlier, -eyetr_outlier)

# View(provo_eyetr_grouped_df)

```


```{r}
provo_df = merge(provo_eyetracking_agg_df, provo_modeling_df, by=c("text_id", "sent_id", "word_idx")) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  arrange(text_id, sent_id, word_idx) %>%
  rename(eyetr_value = value) 

provo_df = merge(provo_df, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
  # almost all the word.x != word.y is because of normalization problem, so we can keep them, instead, deleting some special cases
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
# filter(word.x == word) #%>%
dplyr::select(-word.x, -word.y) %>%
group_by(metric) %>%
  mutate(motr_outlier = if_else(metric != "FPReg" & motr_value > (mean(motr_value) + 3 * sd(motr_value) ), T, F)) %>%
  filter(motr_outlier == F) %>%
  mutate(eyetr_outlier = if_else(metric != "FPReg" & eyetr_value > (mean(eyetr_value) + 3 * sd(eyetr_value) ), T, F)) %>%
  filter(eyetr_outlier == F) %>%
ungroup() %>%
gather(measure, value, c("eyetr_value", "motr_value")) %>%
dplyr::select(-motr_outlier, -eyetr_outlier)
  
# View(provo_df)
# provo_df
```


# Bayesian -- use Stan -- motr & eyetr correlation
```{r}
print("Gaze Duration")
gd_df = provo_df %>% filter(metric == "gaze_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gd_df$eyetr_value, gd_df$motr_value)$estimate)
print(cor.test(gd_df$eyetr_value_log, gd_df$motr_value_log)$estimate)
# View(gd_df)
```


```{r}
gd_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")
  
```



```{r}
# center data around 0.

gd_temp <- gd_df[c("eyetr_value", "motr_value")] %>%
   # mutate(eyetr_value = eyetr_value - mean(eyetr_value),
   #      motr_value = motr_value - mean(motr_value)) %>%
  data.matrix()

gd_temp_log <- gd_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(gd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gd_temp_log
plot(gd_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")

```



```{r, eval=FALSE}
gd_data = list(x=gd_temp, N=nrow(gd_temp))

fit_gd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gd, file = paste0("./bayesian_models_cor/motr_eyetr_gaze_duration_cor_drop0s.rds"))

```

```{r}
print("Go Past Time")
gpt_df = provo_df %>% filter(metric == "go_past_time") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gpt_df$eyetr_value, gpt_df$motr_value)$estimate)
print(cor.test(gpt_df$eyetr_value_log, gpt_df$motr_value_log)$estimate)

gpt_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

gpt_temp <- gpt_df[c("eyetr_value", "motr_value")] %>% data.matrix()

gpt_temp_log <- gpt_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gpt_temp
plot(gpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gpt_temp_log
plot(gpt_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model go past time ----------
gpt_data = list(x=gpt_temp, N=nrow(gpt_temp))
fit_gpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gpt, file = paste0("./bayesian_models_cor/motr_eyetr_go_past_time_cor_drop0s.rds"))
```


```{r}
print("Total Duration")
td_df = provo_df %>% filter(metric == "total_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(td_df$eyetr_value, td_df$motr_value)$estimate)
print(cor.test(td_df$eyetr_value_log, td_df$motr_value_log)$estimate)

td_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

td_temp <- td_df[c("eyetr_value", "motr_value")] %>% data.matrix()

td_temp_log <- td_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(td_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix td_temp_log
plot(td_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model total duration ----------
td_data = list(x=td_temp, N=nrow(td_temp))
fit_td = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=td_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_td@stanmodel@dso <- new("cxxdso")
saveRDS(fit_td, file = paste0("./bayesian_models_cor/motr_eyetr_total_duration_cor.rds"))
```

```{r}
print("First Pass Regression Prob.")
reg_df = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0)
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))
print(cor.test(reg_df$eyetr_value, reg_df$motr_value)$estimate)

# View(reg_df)
reg_df %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp <- reg_df[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg ----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
  file="stan_models/bivariate_beta_correlation_reg.stan", 
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models_cor/motr_eyetr_FPReg_cor_drop0s.rds"))
```



```{r}
# models with all 0s
fit_gd = readRDS("./bayesian_models_cor/motr_eyetr_gaze_duration_cor.rds")
fit_gpt = readRDS("./bayesian_models_cor/motr_eyetr_go_past_time_cor.rds")
fit_td = readRDS("./bayesian_models_cor/motr_eyetr_total_duration_cor.rds")
fit_reg = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor.rds")

# models for drop 0s
# fit_gd = readRDS("./motr_eyetr_gaze_duration_cor_drop0s.rds")
# fit_gpt = readRDS("./motr_eyetr_go_past_time_cor_drop0s.rds")
# fit_td = readRDS("./motr_eyetr_total_duration_cor_drop0s.rds")
# fit_reg = readRDS("./motr_eyetr_FPReg_cor_drop0s.rds")

print('---------------------------- Gaze Duration--------------------------------------------')
print(fit_gd)
print('---------------------------- Go Past Time--------------------------------------------')
print(fit_gpt)
print('---------------------------- Total Duration--------------------------------------------')
print(fit_td)
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
print(fit_reg)

```

```{r, eval=FALSE}
# stan_trace(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_gd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_gd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_gd)
stan_dens(fit_gd, separate_chains = TRUE)
stan_plot(fit_gd)

# Go Past Time
stan_trace(fit_gpt)
stan_dens(fit_gpt, separate_chains = TRUE)
stan_plot(fit_gpt)

# Total Duration
stan_trace(fit_td)
stan_dens(fit_td, separate_chains = TRUE)
stan_plot(fit_td)

# FPReg
stan_trace(fit_reg)
stan_dens(fit_reg, separate_chains = TRUE)
stan_plot(fit_reg)

```

```{r, fig.width=5, fig.height=10, eval=FALSE}
p1 <- stan_trace(fit_gd, pars = 'rho', inc_warmup = FALSE)
p2 <- stan_dens(fit_gd, pars = 'rho', separate_chains = TRUE)
p3 <- stan_trace(fit_gd, pars = 'mu[1]', inc_warmup = FALSE)
p4 <- stan_dens(fit_gd, pars = 'mu[1]', separate_chains = TRUE)
p5 <- stan_trace(fit_gd, pars = 'mu[2]', inc_warmup = FALSE)
p6 <- stan_dens(fit_gd, pars = 'mu[2]', separate_chains = TRUE)
p7 <- stan_trace(fit_gd, pars = 'sigma[1]', inc_warmup = FALSE)
p8 <- stan_dens(fit_gd, pars = 'sigma[1]', separate_chains = TRUE)
p9 <- stan_trace(fit_gd, pars = 'sigma[2]', inc_warmup = FALSE)
p10 <- stan_dens(fit_gd, pars = 'sigma[2]', separate_chains = TRUE)
p11 <- stan_trace(fit_gd, pars = 'nu', inc_warmup = FALSE)
p12 <- stan_dens(fit_gd, pars = 'nu', separate_chains = TRUE)


# Use grid.arrange() to arrange the plots
# grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, ncol=2, nrow=6)

```


```{r}
print('---------------------------- Gaze Duration--------------------------------------------')
rho_gd = as.numeric(extract(fit_gd, "rho")[[1]])
mean = mean(rho_gd)
crI = quantile(rho_gd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Go Past Time--------------------------------------------')
rho_gpt = as.numeric(extract(fit_gpt, "rho")[[1]])
mean = mean(rho_gpt)
crI = quantile(rho_gpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Total Duration--------------------------------------------')
rho_td = as.numeric(extract(fit_td, "rho")[[1]])
mean = mean(rho_td)
crI = quantile(rho_td, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_td), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression --------------------------------------------')
rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")

```


```{r}
print('---------------------------- Gaze Duration--------------------------------------------')
gd_rand <- extract(fit_gd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gd_rand[,1], gd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Go Past Time--------------------------------------------')
gpt_rand <- extract(fit_gpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gpt_rand[,1], gpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Total Duration--------------------------------------------')
td_rand <- extract(fit_td, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(td_rand[,1], td_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(td_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(td_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(td_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression --------------------------------------------')
reg_rand <- extract(fit_reg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(reg_rand[,1], reg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(reg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(reg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


```

# model motr eyetr FPReg correlation (eyetr < 0.3)
```{r}
print("First Pass Regression Prob. all and  < 0.3")
reg_df_all = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))

reg_df_low_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)

reg_df_low = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate)
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value)
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$estimate)
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$p.value)
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$estimate)
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$p.value)

# View(reg_df)
reg_df_low %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_all <- reg_df_all[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low <- reg_df_low[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low_drop0 <- reg_df_low_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp_low, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg < 0.3 ----------
reg_data = list(x=reg_temp_all, N=nrow(reg_temp_all))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models_cor/motr_eyetr_FPReg_cor_all_data_drop0s.rds"))
```


# model motr eyetr FPReg correlation (eyetr >= 0.3)
```{r}
print("First Pass Regression Prob. >= 0.3")
reg_df_high_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)

reg_df_high = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$estimate)
print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$p.value)
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$estimate)
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$p.value)

# View(reg_df)
reg_df_high %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_high <- reg_df_high[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_high_drop0 <- reg_df_high_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix td_temp
plot(reg_temp_all, pch = 16, col = "blue",
     main = "FPReg not logged all data")
plot(reg_temp_low, pch = 16, col = "blue",
     main = "FPReg not logged eyetr < 0.3 ")
plot(reg_temp_high, pch = 16, col = "blue",
     main = "FPReg not logged eyetr >= 0.3")
```

```{r, eval=FALSE}
# -------fit model FPReg >= 0.3 ----------
reg_data = list(x=reg_temp_high_drop0, N=nrow(reg_temp_high_drop0))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models_cor/motr_eyetr_FPReg_cor_03-1_drop0s.rds"))
```



```{r}
fit_mreg_all = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_all_data.rds")
fit_mreg_all_drop0 = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_all_data_drop0s.rds")
fit_mreg_low = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_00-03.rds")
fit_mreg_low_drop0 = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_00-03_drop0s.rds")
fit_mreg_high = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_03-1.rds")
fit_mreg_high_drop0 = readRDS("./bayesian_models_cor/motr_eyetr_FPReg_cor_03-1_drop0s.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_mreg_all)
print('---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------')
print(fit_mreg_all_drop0)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_mreg_low)
print('---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------')
print(fit_mreg_low_drop0)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_mreg_high)
print('---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------')
print(fit_mreg_high_drop0)
```

```{r}
# # FPReg all data
# stan_trace(fit_mreg_all)
# stan_dens(fit_mreg_all, separate_chains = TRUE)
# stan_plot(fit_mreg_all)

# stan_trace(fit_mreg_all_drop0)
# stan_dens(fit_mreg_all_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_all_drop0)

# # FPReg < 0.3
# stan_trace(fit_mreg_low)
# stan_dens(fit_mreg_low, separate_chains = TRUE)
# stan_plot(fit_mreg_low)
# 
# stan_trace(fit_mreg_low_drop0)
# stan_dens(fit_mreg_low_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_low_drop0)

# FPReg > 0.3
stan_trace(fit_mreg_high)
stan_dens(fit_mreg_high, separate_chains = TRUE)
stan_plot(fit_mreg_high)

stan_trace(fit_mreg_high_drop0)
stan_dens(fit_mreg_high_drop0, separate_chains = TRUE)
stan_plot(fit_mreg_high_drop0)
```

```{r}
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_mreg_all = as.numeric(extract(fit_mreg_all, "rho")[[1]])
mean = mean(rho_mreg_all)
crI = quantile(rho_mreg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
rho_mreg_all_drop0 = as.numeric(extract(fit_mreg_all_drop0, "rho")[[1]])
mean = mean(rho_mreg_all_drop0)
crI = quantile(rho_mreg_all_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_mreg_low = as.numeric(extract(fit_mreg_low, "rho")[[1]])
mean = mean(rho_mreg_low)
crI = quantile(rho_mreg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------')
rho_mreg_low_drop0 = as.numeric(extract(fit_mreg_low_drop0, "rho")[[1]])
mean = mean(rho_mreg_low_drop0)
crI = quantile(rho_mreg_low_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_mreg_high = as.numeric(extract(fit_mreg_high, "rho")[[1]])
mean = mean(rho_mreg_high)
crI = quantile(rho_mreg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------')
rho_mreg_high_drop0 = as.numeric(extract(fit_mreg_high_drop0, "rho")[[1]])
mean = mean(rho_mreg_high_drop0)
crI = quantile(rho_mreg_high_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
```

```{r}
print('---------------------------- First Pass Regression all data --------------------------------------------')
mallreg_rand <- extract(fit_mreg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand[,1], mallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
mallreg_rand_drop0 <- extract(fit_mreg_all_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand_drop0[,1], mallreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
mlowreg_rand <- extract(fit_mreg_low, "x_rand")[[1]]
print(mlowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand[,1], mlowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------')
mlowreg_rand_drop0 <- extract(fit_mreg_low_drop0, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand_drop0[,1], mlowreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low_drop0, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
mhighreg_rand_samples <- extract(fit_mreg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(mhighreg_rand_samples), 900)
mhighreg_rand <- mhighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mhighreg_rand[,1], mhighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mhighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

# print('---------------------------- First Pass Regression >= 0.3 no 0s --------------------------------------------')
mhighreg_rand_drop0_samples <- extract(fit_mreg_high_drop0, "x_rand")[[1]]
selected_indices <- sample(1:nrow(mhighreg_rand_drop0_samples), 900)
mhighreg_rand_drop0 <- mhighreg_rand_drop0_samples[selected_indices, ]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color
points(mhighreg_rand_drop0[,1], mhighreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high_drop0, pch=16, col="red")

# add dataEllipse with color
dataEllipse(mhighreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```


# model eye tracking to eye tracking correlation
```{r}

print("Gaze Duration")
# View(provo_eyetr_grouped_df)

egd_df = provo_eyetr_grouped_df %>% filter(metric == "gaze_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egd_df$eyetr_value_1, egd_df$eyetr_value_2)$estimate)

# View(egd_df)

egd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egd_temp <- egd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(egd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")

```

```{r, eval=FALSE}
egd_data = list(x=egd_temp, N=nrow(egd_temp))

fit_egd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egd, file = paste0("./bayesian_models_cor/eyetr_eyetr_gaze_duration_cor.rds"))

```

```{r}
print("Go Past Time")

egpt_df = provo_eyetr_grouped_df %>% filter(metric == "go_past_time") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egpt_df$eyetr_value_1, egpt_df$eyetr_value_2)$estimate)

# View(egd_df)

egpt_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egpt_temp <- egpt_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(egpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
egpt_data = list(x=egpt_temp, N=nrow(egpt_temp))

fit_egpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egpt, file = paste0("./bayesian_models_cor/eyetr_eyetr_go_past_time_cor.rds"))

```


```{r}
print("Total Duration")

etd_df = provo_eyetr_grouped_df %>% filter(metric == "total_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(etd_df$eyetr_value_1, etd_df$eyetr_value_2)$estimate)

# View(egd_df)

etd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

etd_temp <- etd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(etd_temp, pch = 16, col = "blue",
     main = "Total Duration Not Log-Transformed")
```

```{r, eval=FALSE}
etd_data = list(x=etd_temp, N=nrow(etd_temp))

fit_etd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=etd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_etd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_etd, file = paste0("./bayesian_models_cor/eyetr_eyetr_total_duration_cor.rds"))
```


```{r}
print("Fisrt Pass Regression Prob.")

ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)
  )
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)

# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg ----------

# View(ereg_temp)
ereg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_ereg = stan(
  file="stan_models/bivariate_normal_reg.stan", 
  data=ereg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99),
  verbose = FALSE
  )

# Save the model 
fit_ereg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_ereg, file = paste0("./bayesian_models_cor/eyetr_eyetr_FPReg_cor5.rds"))
```


```{r}
fit_egd = readRDS("./bayesian_models_cor/eyetr_eyetr_gaze_duration_cor.rds")
fit_egpt = readRDS("./bayesian_models_cor/eyetr_eyetr_go_past_time_cor.rds")
fit_etd = readRDS("./bayesian_models_cor/eyetr_eyetr_total_duration_cor.rds")
fit_ereg = readRDS("./bayesian_models_cor/eyetr_eyetr_FPReg_cor.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
print(fit_egd)
print('---------------------------- Go Past Time--------------------------------------------')
print(fit_egpt)
print('---------------------------- Total Duration--------------------------------------------')
print(fit_etd)
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
print(fit_ereg)
```


```{r, eval=FALSE}
# stan_trace(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_egd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_egd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_egd)
stan_dens(fit_egd, separate_chains = TRUE)
stan_plot(fit_egd)

# Go Past Time
stan_trace(fit_egpt)
stan_dens(fit_egpt, separate_chains = TRUE)
stan_plot(fit_egpt)

# Total Duration
stan_trace(fit_etd)
stan_dens(fit_etd, separate_chains = TRUE)
stan_plot(fit_etd)

# FPReg
stan_trace(fit_ereg)
stan_dens(fit_ereg, separate_chains = TRUE)
stan_plot(fit_ereg)
```


```{r}
print('---------------------------- Gaze Duration--------------------------------------------')
rho_egd = as.numeric(extract(fit_egd, "rho")[[1]])
mean = mean(rho_egd)
crI = quantile(rho_egd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Go Past Time--------------------------------------------')
rho_egpt = as.numeric(extract(fit_egpt, "rho")[[1]])
mean = mean(rho_egpt)
crI = quantile(rho_egpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Total Duration--------------------------------------------')
rho_etd = as.numeric(extract(fit_etd, "rho")[[1]])
mean = mean(rho_etd)
crI = quantile(rho_etd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_etd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression --------------------------------------------')
rho_ereg = as.numeric(extract(fit_ereg, "rho")[[1]])
mean = mean(rho_ereg)
crI = quantile(rho_ereg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
```

```{r}
print('---------------------------- Gaze Duration--------------------------------------------')
egd_rand <- extract(fit_egd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egd_rand[,1], egd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Go Past Time--------------------------------------------')
egpt_rand <- extract(fit_egpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egpt_rand[,1], egpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Total Duration--------------------------------------------')
etd_rand <- extract(fit_etd, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(etd_rand[,1], etd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(etd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(etd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(etd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression --------------------------------------------')
ereg_rand <- extract(fit_ereg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "First Pass Regression") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ereg_rand[,1], ereg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ereg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ereg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
```


# Bayesian -- correlation between MoTR and word level statistics
```{r}
print("Log Frequency")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$freq)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$freq)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(7, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mfreq_temp <- stats_cor_df[c("motr_value", "freq")] %>%
  data.matrix()
efreq_temp <- stats_cor_df[c("eyetr_value", "freq")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mfreq_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Frequency")

# Plot the first data matrix gd_temp
plot(efreq_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Frequency")

```
# motr & frequency
```{r, eval=FALSE}
mfreq_data = list(x=mfreq_temp, N=nrow(mfreq_temp))

fit_mfreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=mfreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mfreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mfreq, file = paste0("./bayesian_models_cor/motr_freq_cor.rds"))
```

# eyetr & frequency
```{r, eval=FALSE}
efreq_data = list(x=efreq_temp, N=nrow(efreq_temp))

fit_efreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=efreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_efreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_efreq, file = paste0("./bayesian_models_cor/eyetr_freq_cor.rds"))
```

```{r}
print("Length")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$len)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$len)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(9, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mlen_temp <- stats_cor_df[c("motr_value", "len")] %>%
  data.matrix()
elen_temp <- stats_cor_df[c("eyetr_value", "len")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mlen_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Length")

# Plot the first data matrix gd_temp
plot(elen_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Length")
```

# motr & length
```{r, eval=FALSE}
mlen_data = list(x=mlen_temp, N=nrow(mlen_temp))

fit_mlen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=mlen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mlen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mlen, file = paste0("./bayesian_models_cor/motr_len_cor2.rds"))

```

# eyetr & length
```{r, eval=FALSE}
elen_data = list(x=elen_temp, N=nrow(elen_temp))

fit_elen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=elen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_elen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_elen, file = paste0("./bayesian_models_cor/eyetr_len_cor.rds"))
```


```{r}
print("Surprisal")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$surp)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$surp)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(8, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

msurp_temp <- stats_cor_df[c("motr_value", "surp")] %>%
  data.matrix()
esurp_temp <- stats_cor_df[c("eyetr_value", "surp")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(msurp_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Surprisal")

# Plot the first data matrix gd_temp
plot(esurp_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Surprisal")
```
# motr & surprisal
```{r, eval=FALSE}
msurp_data = list(x=msurp_temp, N=nrow(msurp_temp))

fit_msurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=msurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_msurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_msurp, file = paste0("./bayesian_models_cor/motr_surp_cor.rds"))

```

# eyetr & surprisal
```{r, eval=FALSE}
esurp_data = list(x=esurp_temp, N=nrow(esurp_temp))

fit_esurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=esurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_esurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_esurp, file = paste0("./bayesian_models_cor/eyetr_surp_cor.rds"))

```


```{r}
fit_mfreq = readRDS("./bayesian_models_cor/motr_freq_cor.rds")
fit_efreq = readRDS("./bayesian_models_cor/eyetr_freq_cor.rds")
fit_mlen = readRDS("./bayesian_models_cor/motr_len_cor.rds")
fit_elen = readRDS("./bayesian_models_cor/eyetr_len_cor.rds")
fit_msurp = readRDS("./bayesian_models_cor/motr_surp_cor.rds")
fit_esurp = readRDS("./bayesian_models_cor/eyetr_surp_cor.rds")

print('---------------------------- MoTR & Log Frequency --------------------------------------------')
print(fit_mfreq)
print('---------------------------- EyeTR & Log Frequency --------------------------------------------')
print(fit_efreq)
print('---------------------------- MoTR & Length --------------------------------------------')
print(fit_mlen)
print('---------------------------- EyeTR & Length --------------------------------------------')
print(fit_elen)
print('---------------------------- MoTR & Surprisal --------------------------------------------')
print(fit_msurp)
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
print(fit_esurp)

```

```{r, eval=FALSE}
# MoTR & Log Freq
stan_trace(fit_mfreq)
stan_dens(fit_mfreq, separate_chains = TRUE)
stan_plot(fit_mfreq)

# EyeTR & Log Freq
stan_trace(fit_efreq)
stan_dens(fit_efreq, separate_chains = TRUE)
stan_plot(fit_efreq)

# MoTR & Len
stan_trace(fit_mlen)
stan_dens(fit_mlen, separate_chains = TRUE)
stan_plot(fit_mlen)

# EyeTR & Len
stan_trace(fit_elen)
stan_dens(fit_elen, separate_chains = TRUE)
stan_plot(fit_elen)

# MoTR & Surprisal
stan_trace(fit_msurp)
stan_dens(fit_msurp, separate_chains = TRUE)
stan_plot(fit_msurp)

# EyeTR & Surprisal
stan_trace(fit_esurp)
stan_dens(fit_esurp, separate_chains = TRUE)
stan_plot(fit_esurp)

```


```{r}
print('---------------------------- MoTR & Log Freq --------------------------------------------')
rho_mfreq = as.numeric(extract(fit_mfreq, "rho")[[1]])
mean = mean(rho_mfreq)
crI = quantile(rho_mfreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mfreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Log Freq --------------------------------------------')
rho_efreq = as.numeric(extract(fit_efreq, "rho")[[1]])
mean = mean(rho_efreq)
crI = quantile(rho_efreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_efreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- MoTR & Length --------------------------------------------')
rho_mlen = as.numeric(extract(fit_mlen, "rho")[[1]])
mean = mean(rho_mlen)
crI = quantile(rho_mlen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mlen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Length --------------------------------------------')
rho_elen = as.numeric(extract(fit_elen, "rho")[[1]])
mean = mean(rho_elen)
crI = quantile(rho_elen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_elen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- MoTR & Surprisal --------------------------------------------')
rho_msurp = as.numeric(extract(fit_msurp, "rho")[[1]])
mean = mean(rho_msurp)
crI = quantile(rho_msurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_msurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Surprisal --------------------------------------------')
rho_esurp = as.numeric(extract(fit_esurp, "rho")[[1]])
mean = mean(rho_esurp)
crI = quantile(rho_esurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_esurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")

```



```{r}
print('---------------------------- MoTR & Log Frequency--------------------------------------------')
mfreq_rand <- extract(fit_mfreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 12), type="n",
     xlab = "MoTR value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mfreq_rand[,1], mfreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mfreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mfreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mfreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Log Frequency--------------------------------------------')
efreq_rand <- extract(fit_efreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 500), ylim=c(0, 12), type="n",
     xlab = "Eye tracking value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(efreq_rand[,1], efreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(efreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(efreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(efreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- MoTR & Length --------------------------------------------')
mlen_rand <- extract(fit_mlen, "x_rand")[[1]]
# mlen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlen_rand[,1], mlen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mlen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Length --------------------------------------------')
elen_rand <- extract(fit_elen, "x_rand")[[1]]
# elen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elen_rand[,1], elen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(elen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- MoTR & Surprisal --------------------------------------------')
msurp_rand <- extract(fit_msurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(msurp_rand [,1], msurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(msurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(msurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(msurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Surprisal --------------------------------------------')
esurp_rand <- extract(fit_esurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(esurp_rand [,1], esurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(esurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(esurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(esurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```


```{r}
print("EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 ")

ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5))

ereg_df_low = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 < 0.3)
# View(ereg_df_low)

ereg_df_high = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 >= 0.3)
# View(ereg_df_high) 

print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$p.value)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$estimate)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$p.value)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$estimate)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$p.value)

# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_low <- ereg_df_low[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_high <- ereg_df_high[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg all data Not Log-Transformed")
plot(ereg_temp_low, pch = 16, col = "blue",
     main = "FPReg < 0.3 Not Log-Transformed")
plot(ereg_temp_high, pch = 16, col = "blue",
     main = "FPReg > 0.3 Not Log-Transformed")
```


```{r, eval=FALSE}
# -------fit model eyetr vs. eyetr FPReg <0.3 & >=0.3 ----------
reg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models_cor/eyetr_eyetr_FPReg_cor_all_data.rds"))
```


```{r}
# fit_ereg_all = readRDS("./eyetr_eyetr_FPReg_cor_all_data.rds")
fit_ereg_all = readRDS("./bayesian_models_cor/eyetr_eyetr_FPReg_cor.rds")
fit_ereg_low = readRDS("./bayesian_models_cor/eyetr_eyetr_FPReg_cor_00-03.rds")
fit_ereg_high = readRDS("./bayesian_models_cor/eyetr_eyetr_FPReg_cor_03-1.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_ereg_all)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_ereg_low)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_ereg_high)

```


```{r}
# # FPReg all data
stan_trace(fit_ereg_all)
stan_dens(fit_ereg_all, separate_chains = TRUE)
stan_plot(fit_ereg_all)

# # FPReg < 0.3
stan_trace(fit_ereg_low)
stan_dens(fit_ereg_low, separate_chains = TRUE)
stan_plot(fit_ereg_low)

# FPReg >= 0.3
stan_trace(fit_ereg_high)
stan_dens(fit_ereg_high, separate_chains = TRUE)
stan_plot(fit_ereg_high)
```

```{r}
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_ereg_all = as.numeric(extract(fit_ereg_all, "rho")[[1]])
mean = mean(rho_ereg_all)
crI = quantile(rho_ereg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_ereg_low = as.numeric(extract(fit_ereg_low, "rho")[[1]])
mean = mean(rho_ereg_low)
crI = quantile(rho_ereg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_ereg_high = as.numeric(extract(fit_ereg_high, "rho")[[1]])
mean = mean(rho_ereg_high)
crI = quantile(rho_ereg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
```

```{r}
print('---------------------------- First Pass Regression all data --------------------------------------------')
eallreg_rand <- extract(fit_ereg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(eallreg_rand[,1], eallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(eallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(eallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
elowreg_rand <- extract(fit_ereg_low, "x_rand")[[1]]
# print(elowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elowreg_rand[,1], elowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
ehighreg_rand_samples <- extract(fit_ereg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(ehighreg_rand_samples), 900)
ehighreg_rand <- ehighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ehighreg_rand[,1], ehighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ehighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ehighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```
